vsock/virtio: support to send non-linear skb
[linux-2.6-block.git] / net / vmw_vsock / virtio_transport_common.c
CommitLineData
7a338472 1// SPDX-License-Identifier: GPL-2.0-only
06a8fc78
AH
2/*
3 * common code for virtio vsock
4 *
5 * Copyright (C) 2013-2015 Red Hat, Inc.
6 * Author: Asias He <asias@redhat.com>
7 * Stefan Hajnoczi <stefanha@redhat.com>
06a8fc78
AH
8 */
9#include <linux/spinlock.h>
10#include <linux/module.h>
174cd4b1 11#include <linux/sched/signal.h>
06a8fc78
AH
12#include <linux/ctype.h>
13#include <linux/list.h>
06a8fc78 14#include <linux/virtio_vsock.h>
82dfb540 15#include <uapi/linux/vsockmon.h>
06a8fc78
AH
16
17#include <net/sock.h>
18#include <net/af_vsock.h>
19
20#define CREATE_TRACE_POINTS
21#include <trace/events/vsock_virtio_transport_common.h>
22
23/* How long to wait for graceful shutdown of a connection */
24#define VSOCK_CLOSE_TIMEOUT (8 * HZ)
25
473c7391
SG
26/* Threshold for detecting small packets to copy */
27#define GOOD_COPY_LEN 128
28
daabfbca
SG
29static const struct virtio_transport *
30virtio_transport_get_ops(struct vsock_sock *vsk)
06a8fc78 31{
daabfbca 32 const struct vsock_transport *t = vsock_core_get_transport(vsk);
06a8fc78 33
4aaf5961
SG
34 if (WARN_ON(!t))
35 return NULL;
36
06a8fc78
AH
37 return container_of(t, struct virtio_transport, transport);
38}
39
71dc9ec9
BE
40/* Returns a new packet on success, otherwise returns NULL.
41 *
42 * If NULL is returned, errp is set to a negative errno.
43 */
44static struct sk_buff *
45virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
06a8fc78
AH
46 size_t len,
47 u32 src_cid,
48 u32 src_port,
49 u32 dst_cid,
50 u32 dst_port)
51{
71dc9ec9
BE
52 const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
53 struct virtio_vsock_hdr *hdr;
54 struct sk_buff *skb;
55 void *payload;
06a8fc78
AH
56 int err;
57
71dc9ec9
BE
58 skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
59 if (!skb)
06a8fc78
AH
60 return NULL;
61
71dc9ec9
BE
62 hdr = virtio_vsock_hdr(skb);
63 hdr->type = cpu_to_le16(info->type);
64 hdr->op = cpu_to_le16(info->op);
65 hdr->src_cid = cpu_to_le64(src_cid);
66 hdr->dst_cid = cpu_to_le64(dst_cid);
67 hdr->src_port = cpu_to_le32(src_port);
68 hdr->dst_port = cpu_to_le32(dst_port);
69 hdr->flags = cpu_to_le32(info->flags);
70 hdr->len = cpu_to_le32(len);
06a8fc78
AH
71
72 if (info->msg && len > 0) {
71dc9ec9
BE
73 payload = skb_put(skb, len);
74 err = memcpy_from_msg(payload, info->msg, len);
06a8fc78
AH
75 if (err)
76 goto out;
9ac841f5
AK
77
78 if (msg_data_left(info->msg) == 0 &&
8d5ac871 79 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
71dc9ec9 80 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
8d5ac871
AK
81
82 if (info->msg->msg_flags & MSG_EOR)
71dc9ec9 83 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
8d5ac871 84 }
06a8fc78
AH
85 }
86
71dc9ec9
BE
87 if (info->reply)
88 virtio_vsock_skb_set_reply(skb);
89
06a8fc78
AH
90 trace_virtio_transport_alloc_pkt(src_cid, src_port,
91 dst_cid, dst_port,
92 len,
93 info->type,
94 info->op,
95 info->flags);
96
f9d2b1e1
BE
97 if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) {
98 WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n");
99 goto out;
100 }
101
71dc9ec9 102 return skb;
06a8fc78
AH
103
104out:
71dc9ec9 105 kfree_skb(skb);
06a8fc78
AH
106 return NULL;
107}
06a8fc78 108
82dfb540
GG
109/* Packet capture */
110static struct sk_buff *virtio_transport_build_skb(void *opaque)
111{
71dc9ec9
BE
112 struct virtio_vsock_hdr *pkt_hdr;
113 struct sk_buff *pkt = opaque;
82dfb540
GG
114 struct af_vsockmon_hdr *hdr;
115 struct sk_buff *skb;
6dbd3e66
SG
116 size_t payload_len;
117 void *payload_buf;
82dfb540 118
6dbd3e66
SG
119 /* A packet could be split to fit the RX buffer, so we can retrieve
120 * the payload length from the header and the buffer pointer taking
121 * care of the offset in the original packet.
122 */
71dc9ec9
BE
123 pkt_hdr = virtio_vsock_hdr(pkt);
124 payload_len = pkt->len;
125 payload_buf = pkt->data;
6dbd3e66 126
71dc9ec9 127 skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
82dfb540
GG
128 GFP_ATOMIC);
129 if (!skb)
130 return NULL;
131
4df864c1 132 hdr = skb_put(skb, sizeof(*hdr));
82dfb540
GG
133
134 /* pkt->hdr is little-endian so no need to byteswap here */
71dc9ec9
BE
135 hdr->src_cid = pkt_hdr->src_cid;
136 hdr->src_port = pkt_hdr->src_port;
137 hdr->dst_cid = pkt_hdr->dst_cid;
138 hdr->dst_port = pkt_hdr->dst_port;
82dfb540
GG
139
140 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
71dc9ec9 141 hdr->len = cpu_to_le16(sizeof(*pkt_hdr));
82dfb540
GG
142 memset(hdr->reserved, 0, sizeof(hdr->reserved));
143
71dc9ec9 144 switch (le16_to_cpu(pkt_hdr->op)) {
82dfb540
GG
145 case VIRTIO_VSOCK_OP_REQUEST:
146 case VIRTIO_VSOCK_OP_RESPONSE:
147 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
148 break;
149 case VIRTIO_VSOCK_OP_RST:
150 case VIRTIO_VSOCK_OP_SHUTDOWN:
151 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
152 break;
153 case VIRTIO_VSOCK_OP_RW:
154 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
155 break;
156 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
157 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
158 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
159 break;
160 default:
161 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
162 break;
163 }
164
71dc9ec9 165 skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
82dfb540 166
6dbd3e66
SG
167 if (payload_len) {
168 skb_put_data(skb, payload_buf, payload_len);
82dfb540
GG
169 }
170
171 return skb;
172}
173
71dc9ec9 174void virtio_transport_deliver_tap_pkt(struct sk_buff *skb)
82dfb540 175{
71dc9ec9 176 if (virtio_vsock_skb_tap_delivered(skb))
a78d1639
SG
177 return;
178
71dc9ec9
BE
179 vsock_deliver_tap(virtio_transport_build_skb, skb);
180 virtio_vsock_skb_set_tap_delivered(skb);
82dfb540
GG
181}
182EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
183
e4b1ef15
AK
184static u16 virtio_transport_get_type(struct sock *sk)
185{
186 if (sk->sk_type == SOCK_STREAM)
187 return VIRTIO_VSOCK_TYPE_STREAM;
188 else
189 return VIRTIO_VSOCK_TYPE_SEQPACKET;
190}
191
4aaf5961
SG
192/* This function can only be used on connecting/connected sockets,
193 * since a socket assigned to a transport is required.
194 *
195 * Do not use on listener sockets!
196 */
06a8fc78
AH
197static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
198 struct virtio_vsock_pkt_info *info)
199{
200 u32 src_cid, src_port, dst_cid, dst_port;
4aaf5961 201 const struct virtio_transport *t_ops;
06a8fc78 202 struct virtio_vsock_sock *vvs;
06a8fc78 203 u32 pkt_len = info->pkt_len;
b68ffb1b
AK
204 u32 rest_len;
205 int ret;
06a8fc78 206
9ac841f5 207 info->type = virtio_transport_get_type(sk_vsock(vsk));
b93f8877 208
4aaf5961
SG
209 t_ops = virtio_transport_get_ops(vsk);
210 if (unlikely(!t_ops))
211 return -EFAULT;
212
213 src_cid = t_ops->transport.get_local_cid();
06a8fc78
AH
214 src_port = vsk->local_addr.svm_port;
215 if (!info->remote_cid) {
216 dst_cid = vsk->remote_addr.svm_cid;
217 dst_port = vsk->remote_addr.svm_port;
218 } else {
219 dst_cid = info->remote_cid;
220 dst_port = info->remote_port;
221 }
222
223 vvs = vsk->trans;
224
06a8fc78
AH
225 /* virtio_transport_get_credit might return less than pkt_len credit */
226 pkt_len = virtio_transport_get_credit(vvs, pkt_len);
227
228 /* Do not send zero length OP_RW pkt */
229 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
230 return pkt_len;
231
b68ffb1b
AK
232 rest_len = pkt_len;
233
234 do {
235 struct sk_buff *skb;
236 size_t skb_len;
237
238 skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len);
239
240 skb = virtio_transport_alloc_skb(info, skb_len,
241 src_cid, src_port,
242 dst_cid, dst_port);
243 if (!skb) {
244 ret = -ENOMEM;
245 break;
246 }
247
248 virtio_transport_inc_tx_pkt(vvs, skb);
249
250 ret = t_ops->send_pkt(skb);
251 if (ret < 0)
252 break;
253
254 /* Both virtio and vhost 'send_pkt()' returns 'skb_len',
255 * but for reliability use 'ret' instead of 'skb_len'.
256 * Also if partial send happens (e.g. 'ret' != 'skb_len')
257 * somehow, we break this loop, but account such returned
258 * value in 'virtio_transport_put_credit()'.
259 */
260 rest_len -= ret;
261
262 if (WARN_ONCE(ret != skb_len,
263 "'send_pkt()' returns %i, but %zu expected\n",
264 ret, skb_len))
265 break;
266 } while (rest_len);
267
268 virtio_transport_put_credit(vvs, rest_len);
06a8fc78 269
b68ffb1b
AK
270 /* Return number of bytes, if any data has been sent. */
271 if (rest_len != pkt_len)
272 ret = pkt_len - rest_len;
06a8fc78 273
b68ffb1b 274 return ret;
06a8fc78
AH
275}
276
ae6fcfbf 277static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
07770616 278 u32 len)
06a8fc78 279{
07770616 280 if (vvs->rx_bytes + len > vvs->buf_alloc)
ae6fcfbf
SG
281 return false;
282
07770616 283 vvs->rx_bytes += len;
ae6fcfbf 284 return true;
06a8fc78
AH
285}
286
287static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
07770616 288 u32 len)
06a8fc78 289{
71dc9ec9
BE
290 vvs->rx_bytes -= len;
291 vvs->fwd_cnt += len;
06a8fc78
AH
292}
293
71dc9ec9 294void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
06a8fc78 295{
71dc9ec9
BE
296 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
297
9632e9f6 298 spin_lock_bh(&vvs->rx_lock);
b89d882d 299 vvs->last_fwd_cnt = vvs->fwd_cnt;
71dc9ec9
BE
300 hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
301 hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc);
9632e9f6 302 spin_unlock_bh(&vvs->rx_lock);
06a8fc78
AH
303}
304EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
305
306u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
307{
308 u32 ret;
309
e3ec366e
AK
310 if (!credit)
311 return 0;
312
06a8fc78
AH
313 spin_lock_bh(&vvs->tx_lock);
314 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
315 if (ret > credit)
316 ret = credit;
317 vvs->tx_cnt += ret;
318 spin_unlock_bh(&vvs->tx_lock);
319
320 return ret;
321}
322EXPORT_SYMBOL_GPL(virtio_transport_get_credit);
323
324void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
325{
e3ec366e
AK
326 if (!credit)
327 return;
328
06a8fc78
AH
329 spin_lock_bh(&vvs->tx_lock);
330 vvs->tx_cnt -= credit;
331 spin_unlock_bh(&vvs->tx_lock);
332}
333EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
334
c10844c5 335static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
06a8fc78
AH
336{
337 struct virtio_vsock_pkt_info info = {
338 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
36d277ba 339 .vsk = vsk,
06a8fc78
AH
340 };
341
342 return virtio_transport_send_pkt_info(vsk, &info);
343}
344
a786ab36
MEVL
345static ssize_t
346virtio_transport_stream_do_peek(struct vsock_sock *vsk,
347 struct msghdr *msg,
348 size_t len)
349{
350 struct virtio_vsock_sock *vvs = vsk->trans;
051e77e3
AK
351 struct sk_buff *skb;
352 size_t total = 0;
353 int err;
a786ab36
MEVL
354
355 spin_lock_bh(&vvs->rx_lock);
356
051e77e3
AK
357 skb_queue_walk(&vvs->rx_queue, skb) {
358 size_t bytes;
a786ab36 359
051e77e3
AK
360 bytes = len - total;
361 if (bytes > skb->len)
362 bytes = skb->len;
a786ab36 363
051e77e3 364 spin_unlock_bh(&vvs->rx_lock);
a786ab36 365
051e77e3 366 /* sk_lock is held by caller so no one else can dequeue.
0df7cd3c 367 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
051e77e3 368 */
0df7cd3c
AK
369 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
370 &msg->msg_iter, bytes);
051e77e3
AK
371 if (err)
372 goto out;
a786ab36 373
051e77e3 374 total += bytes;
a786ab36 375
051e77e3 376 spin_lock_bh(&vvs->rx_lock);
a786ab36 377
051e77e3
AK
378 if (total == len)
379 break;
a786ab36
MEVL
380 }
381
382 spin_unlock_bh(&vvs->rx_lock);
383
384 return total;
385
386out:
387 if (total)
388 err = total;
389 return err;
390}
391
06a8fc78
AH
392static ssize_t
393virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
394 struct msghdr *msg,
395 size_t len)
396{
397 struct virtio_vsock_sock *vvs = vsk->trans;
06a8fc78 398 size_t bytes, total = 0;
71dc9ec9 399 struct sk_buff *skb;
06a8fc78 400 int err = -EFAULT;
71dc9ec9 401 u32 free_space;
06a8fc78
AH
402
403 spin_lock_bh(&vvs->rx_lock);
b8d2f61f
AK
404
405 if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes,
406 "rx_queue is empty, but rx_bytes is non-zero\n")) {
407 spin_unlock_bh(&vvs->rx_lock);
408 return err;
409 }
410
71dc9ec9 411 while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
8daaf39f 412 skb = skb_peek(&vvs->rx_queue);
06a8fc78 413
0df7cd3c
AK
414 bytes = min_t(size_t, len - total,
415 skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset);
06a8fc78
AH
416
417 /* sk_lock is held by caller so no one else can dequeue.
0df7cd3c 418 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
06a8fc78
AH
419 */
420 spin_unlock_bh(&vvs->rx_lock);
421
0df7cd3c
AK
422 err = skb_copy_datagram_iter(skb,
423 VIRTIO_VSOCK_SKB_CB(skb)->offset,
424 &msg->msg_iter, bytes);
06a8fc78
AH
425 if (err)
426 goto out;
427
428 spin_lock_bh(&vvs->rx_lock);
429
430 total += bytes;
71dc9ec9 431
0df7cd3c
AK
432 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
433
434 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
07770616
AK
435 u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
436
437 virtio_transport_dec_rx_pkt(vvs, pkt_len);
8daaf39f 438 __skb_unlink(skb, &vvs->rx_queue);
71dc9ec9 439 consume_skb(skb);
06a8fc78
AH
440 }
441 }
b89d882d
SG
442
443 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
444
06a8fc78
AH
445 spin_unlock_bh(&vvs->rx_lock);
446
f4d7c8e3
MT
447 /* To reduce the number of credit update messages,
448 * don't update credits as long as lots of space is available.
449 * Note: the limit chosen here is arbitrary. Setting the limit
450 * too high causes extra messages. Too low causes transmitter
451 * stalls. As stalls are in theory more expensive than extra
452 * messages, we set the limit to a high value. TODO: experiment
453 * with different values.
b89d882d 454 */
b93f8877 455 if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
c10844c5 456 virtio_transport_send_credit_update(vsk);
06a8fc78
AH
457
458 return total;
459
460out:
461 if (total)
462 err = total;
463 return err;
464}
465
a75f501d
AK
466static ssize_t
467virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
468 struct msghdr *msg)
469{
470 struct virtio_vsock_sock *vvs = vsk->trans;
471 struct sk_buff *skb;
472 size_t total, len;
473
474 spin_lock_bh(&vvs->rx_lock);
475
476 if (!vvs->msg_count) {
477 spin_unlock_bh(&vvs->rx_lock);
478 return 0;
479 }
480
481 total = 0;
482 len = msg_data_left(msg);
483
484 skb_queue_walk(&vvs->rx_queue, skb) {
485 struct virtio_vsock_hdr *hdr;
486
487 if (total < len) {
488 size_t bytes;
489 int err;
490
491 bytes = len - total;
492 if (bytes > skb->len)
493 bytes = skb->len;
494
495 spin_unlock_bh(&vvs->rx_lock);
496
497 /* sk_lock is held by caller so no one else can dequeue.
0df7cd3c 498 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
a75f501d 499 */
0df7cd3c
AK
500 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
501 &msg->msg_iter, bytes);
a75f501d
AK
502 if (err)
503 return err;
504
505 spin_lock_bh(&vvs->rx_lock);
506 }
507
508 total += skb->len;
509 hdr = virtio_vsock_hdr(skb);
510
511 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
512 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
513 msg->msg_flags |= MSG_EOR;
514
515 break;
516 }
517 }
518
519 spin_unlock_bh(&vvs->rx_lock);
520
521 return total;
522}
523
44931195
AK
524static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
525 struct msghdr *msg,
526 int flags)
527{
528 struct virtio_vsock_sock *vvs = vsk->trans;
44931195
AK
529 int dequeued_len = 0;
530 size_t user_buf_len = msg_data_left(msg);
44931195 531 bool msg_ready = false;
71dc9ec9 532 struct sk_buff *skb;
44931195
AK
533
534 spin_lock_bh(&vvs->rx_lock);
535
536 if (vvs->msg_count == 0) {
537 spin_unlock_bh(&vvs->rx_lock);
538 return 0;
539 }
540
541 while (!msg_ready) {
71dc9ec9 542 struct virtio_vsock_hdr *hdr;
07770616 543 size_t pkt_len;
71dc9ec9
BE
544
545 skb = __skb_dequeue(&vvs->rx_queue);
546 if (!skb)
547 break;
548 hdr = virtio_vsock_hdr(skb);
07770616 549 pkt_len = (size_t)le32_to_cpu(hdr->len);
44931195 550
91aa49a8 551 if (dequeued_len >= 0) {
44931195
AK
552 size_t bytes_to_copy;
553
44931195
AK
554 bytes_to_copy = min(user_buf_len, pkt_len);
555
556 if (bytes_to_copy) {
557 int err;
558
559 /* sk_lock is held by caller so no one else can dequeue.
0df7cd3c 560 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
44931195
AK
561 */
562 spin_unlock_bh(&vvs->rx_lock);
563
0df7cd3c
AK
564 err = skb_copy_datagram_iter(skb, 0,
565 &msg->msg_iter,
566 bytes_to_copy);
44931195 567 if (err) {
91aa49a8 568 /* Copy of message failed. Rest of
44931195
AK
569 * fragments will be freed without copy.
570 */
44931195
AK
571 dequeued_len = err;
572 } else {
573 user_buf_len -= bytes_to_copy;
574 }
575
576 spin_lock_bh(&vvs->rx_lock);
577 }
578
579 if (dequeued_len >= 0)
580 dequeued_len += pkt_len;
581 }
582
71dc9ec9 583 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
44931195
AK
584 msg_ready = true;
585 vvs->msg_count--;
8d5ac871 586
71dc9ec9 587 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
8d5ac871 588 msg->msg_flags |= MSG_EOR;
44931195
AK
589 }
590
07770616 591 virtio_transport_dec_rx_pkt(vvs, pkt_len);
71dc9ec9 592 kfree_skb(skb);
44931195
AK
593 }
594
595 spin_unlock_bh(&vvs->rx_lock);
596
597 virtio_transport_send_credit_update(vsk);
598
599 return dequeued_len;
600}
601
06a8fc78
AH
602ssize_t
603virtio_transport_stream_dequeue(struct vsock_sock *vsk,
604 struct msghdr *msg,
605 size_t len, int flags)
606{
607 if (flags & MSG_PEEK)
a786ab36
MEVL
608 return virtio_transport_stream_do_peek(vsk, msg, len);
609 else
610 return virtio_transport_stream_do_dequeue(vsk, msg, len);
06a8fc78
AH
611}
612EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
613
44931195
AK
614ssize_t
615virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
616 struct msghdr *msg,
617 int flags)
618{
619 if (flags & MSG_PEEK)
a75f501d
AK
620 return virtio_transport_seqpacket_do_peek(vsk, msg);
621 else
622 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
44931195
AK
623}
624EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
625
9ac841f5
AK
626int
627virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
628 struct msghdr *msg,
629 size_t len)
630{
631 struct virtio_vsock_sock *vvs = vsk->trans;
632
633 spin_lock_bh(&vvs->tx_lock);
634
635 if (len > vvs->peer_buf_alloc) {
636 spin_unlock_bh(&vvs->tx_lock);
637 return -EMSGSIZE;
638 }
639
640 spin_unlock_bh(&vvs->tx_lock);
641
642 return virtio_transport_stream_enqueue(vsk, msg, len);
643}
644EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
645
06a8fc78
AH
646int
647virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
648 struct msghdr *msg,
649 size_t len, int flags)
650{
651 return -EOPNOTSUPP;
652}
653EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
654
655s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
656{
657 struct virtio_vsock_sock *vvs = vsk->trans;
658 s64 bytes;
659
660 spin_lock_bh(&vvs->rx_lock);
661 bytes = vvs->rx_bytes;
662 spin_unlock_bh(&vvs->rx_lock);
663
664 return bytes;
665}
666EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
667
9ac841f5
AK
668u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
669{
670 struct virtio_vsock_sock *vvs = vsk->trans;
671 u32 msg_count;
672
673 spin_lock_bh(&vvs->rx_lock);
674 msg_count = vvs->msg_count;
675 spin_unlock_bh(&vvs->rx_lock);
676
677 return msg_count;
678}
679EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
680
06a8fc78
AH
681static s64 virtio_transport_has_space(struct vsock_sock *vsk)
682{
683 struct virtio_vsock_sock *vvs = vsk->trans;
684 s64 bytes;
685
686 bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
687 if (bytes < 0)
688 bytes = 0;
689
690 return bytes;
691}
692
693s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
694{
695 struct virtio_vsock_sock *vvs = vsk->trans;
696 s64 bytes;
697
698 spin_lock_bh(&vvs->tx_lock);
699 bytes = virtio_transport_has_space(vsk);
700 spin_unlock_bh(&vvs->tx_lock);
701
702 return bytes;
703}
704EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space);
705
706int virtio_transport_do_socket_init(struct vsock_sock *vsk,
707 struct vsock_sock *psk)
708{
709 struct virtio_vsock_sock *vvs;
710
711 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL);
712 if (!vvs)
713 return -ENOMEM;
714
715 vsk->trans = vvs;
716 vvs->vsk = vsk;
c0cfa2d8 717 if (psk && psk->trans) {
06a8fc78
AH
718 struct virtio_vsock_sock *ptrans = psk->trans;
719
06a8fc78 720 vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
06a8fc78
AH
721 }
722
b9f2b0ff
SG
723 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE)
724 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE;
725
726 vvs->buf_alloc = vsk->buffer_size;
06a8fc78
AH
727
728 spin_lock_init(&vvs->rx_lock);
729 spin_lock_init(&vvs->tx_lock);
71dc9ec9 730 skb_queue_head_init(&vvs->rx_queue);
06a8fc78
AH
731
732 return 0;
733}
734EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
735
b9f2b0ff
SG
736/* sk_lock held by the caller */
737void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
06a8fc78
AH
738{
739 struct virtio_vsock_sock *vvs = vsk->trans;
740
b9f2b0ff
SG
741 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE)
742 *val = VIRTIO_VSOCK_MAX_BUF_SIZE;
06a8fc78 743
b9f2b0ff 744 vvs->buf_alloc = *val;
ec3359b6 745
c10844c5 746 virtio_transport_send_credit_update(vsk);
06a8fc78 747}
b9f2b0ff 748EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
06a8fc78
AH
749
750int
751virtio_transport_notify_poll_in(struct vsock_sock *vsk,
752 size_t target,
753 bool *data_ready_now)
754{
e7a3266c 755 *data_ready_now = vsock_stream_has_data(vsk) >= target;
06a8fc78
AH
756
757 return 0;
758}
759EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in);
760
761int
762virtio_transport_notify_poll_out(struct vsock_sock *vsk,
763 size_t target,
764 bool *space_avail_now)
765{
766 s64 free_space;
767
768 free_space = vsock_stream_has_space(vsk);
769 if (free_space > 0)
770 *space_avail_now = true;
771 else if (free_space == 0)
772 *space_avail_now = false;
773
774 return 0;
775}
776EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out);
777
778int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
779 size_t target, struct vsock_transport_recv_notify_data *data)
780{
781 return 0;
782}
783EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init);
784
785int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
786 size_t target, struct vsock_transport_recv_notify_data *data)
787{
788 return 0;
789}
790EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block);
791
792int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
793 size_t target, struct vsock_transport_recv_notify_data *data)
794{
795 return 0;
796}
797EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue);
798
799int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
800 size_t target, ssize_t copied, bool data_read,
801 struct vsock_transport_recv_notify_data *data)
802{
803 return 0;
804}
805EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue);
806
807int virtio_transport_notify_send_init(struct vsock_sock *vsk,
808 struct vsock_transport_send_notify_data *data)
809{
810 return 0;
811}
812EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init);
813
814int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
815 struct vsock_transport_send_notify_data *data)
816{
817 return 0;
818}
819EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block);
820
821int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
822 struct vsock_transport_send_notify_data *data)
823{
824 return 0;
825}
826EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue);
827
828int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
829 ssize_t written, struct vsock_transport_send_notify_data *data)
830{
831 return 0;
832}
833EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
834
835u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
836{
b9f2b0ff 837 return vsk->buffer_size;
06a8fc78
AH
838}
839EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
840
841bool virtio_transport_stream_is_active(struct vsock_sock *vsk)
842{
843 return true;
844}
845EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active);
846
847bool virtio_transport_stream_allow(u32 cid, u32 port)
848{
849 return true;
850}
851EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
852
853int virtio_transport_dgram_bind(struct vsock_sock *vsk,
854 struct sockaddr_vm *addr)
855{
856 return -EOPNOTSUPP;
857}
858EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
859
860bool virtio_transport_dgram_allow(u32 cid, u32 port)
861{
862 return false;
863}
864EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
865
866int virtio_transport_connect(struct vsock_sock *vsk)
867{
868 struct virtio_vsock_pkt_info info = {
869 .op = VIRTIO_VSOCK_OP_REQUEST,
36d277ba 870 .vsk = vsk,
06a8fc78
AH
871 };
872
873 return virtio_transport_send_pkt_info(vsk, &info);
874}
875EXPORT_SYMBOL_GPL(virtio_transport_connect);
876
877int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
878{
879 struct virtio_vsock_pkt_info info = {
880 .op = VIRTIO_VSOCK_OP_SHUTDOWN,
06a8fc78
AH
881 .flags = (mode & RCV_SHUTDOWN ?
882 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
883 (mode & SEND_SHUTDOWN ?
884 VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
36d277ba 885 .vsk = vsk,
06a8fc78
AH
886 };
887
888 return virtio_transport_send_pkt_info(vsk, &info);
889}
890EXPORT_SYMBOL_GPL(virtio_transport_shutdown);
891
892int
893virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
894 struct sockaddr_vm *remote_addr,
895 struct msghdr *msg,
896 size_t dgram_len)
897{
898 return -EOPNOTSUPP;
899}
900EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
901
902ssize_t
903virtio_transport_stream_enqueue(struct vsock_sock *vsk,
904 struct msghdr *msg,
905 size_t len)
906{
907 struct virtio_vsock_pkt_info info = {
908 .op = VIRTIO_VSOCK_OP_RW,
06a8fc78
AH
909 .msg = msg,
910 .pkt_len = len,
36d277ba 911 .vsk = vsk,
06a8fc78
AH
912 };
913
914 return virtio_transport_send_pkt_info(vsk, &info);
915}
916EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue);
917
918void virtio_transport_destruct(struct vsock_sock *vsk)
919{
920 struct virtio_vsock_sock *vvs = vsk->trans;
921
922 kfree(vvs);
923}
924EXPORT_SYMBOL_GPL(virtio_transport_destruct);
925
926static int virtio_transport_reset(struct vsock_sock *vsk,
71dc9ec9 927 struct sk_buff *skb)
06a8fc78
AH
928{
929 struct virtio_vsock_pkt_info info = {
930 .op = VIRTIO_VSOCK_OP_RST,
71dc9ec9 931 .reply = !!skb,
36d277ba 932 .vsk = vsk,
06a8fc78
AH
933 };
934
935 /* Send RST only if the original pkt is not a RST pkt */
71dc9ec9 936 if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST)
06a8fc78
AH
937 return 0;
938
939 return virtio_transport_send_pkt_info(vsk, &info);
940}
941
942/* Normally packets are associated with a socket. There may be no socket if an
943 * attempt was made to connect to a socket that does not exist.
944 */
4c7246dc 945static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
71dc9ec9 946 struct sk_buff *skb)
06a8fc78 947{
71dc9ec9 948 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
06a8fc78
AH
949 struct virtio_vsock_pkt_info info = {
950 .op = VIRTIO_VSOCK_OP_RST,
71dc9ec9 951 .type = le16_to_cpu(hdr->type),
06a8fc78
AH
952 .reply = true,
953 };
71dc9ec9 954 struct sk_buff *reply;
06a8fc78
AH
955
956 /* Send RST only if the original pkt is not a RST pkt */
71dc9ec9 957 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
06a8fc78
AH
958 return 0;
959
4d1f5155
AK
960 if (!t)
961 return -ENOTCONN;
962
71dc9ec9
BE
963 reply = virtio_transport_alloc_skb(&info, 0,
964 le64_to_cpu(hdr->dst_cid),
965 le32_to_cpu(hdr->dst_port),
966 le64_to_cpu(hdr->src_cid),
967 le32_to_cpu(hdr->src_port));
4c404ce2 968 if (!reply)
06a8fc78
AH
969 return -ENOMEM;
970
4c404ce2 971 return t->send_pkt(reply);
06a8fc78
AH
972}
973
8432b811
SG
974/* This function should be called with sk_lock held and SOCK_DONE set */
975static void virtio_transport_remove_sock(struct vsock_sock *vsk)
976{
977 struct virtio_vsock_sock *vvs = vsk->trans;
8432b811
SG
978
979 /* We don't need to take rx_lock, as the socket is closing and we are
980 * removing it.
981 */
71dc9ec9 982 __skb_queue_purge(&vvs->rx_queue);
8432b811
SG
983 vsock_remove_sock(vsk);
984}
985
06a8fc78
AH
986static void virtio_transport_wait_close(struct sock *sk, long timeout)
987{
988 if (timeout) {
d9dc8b0f
WC
989 DEFINE_WAIT_FUNC(wait, woken_wake_function);
990
991 add_wait_queue(sk_sleep(sk), &wait);
06a8fc78
AH
992
993 do {
06a8fc78 994 if (sk_wait_event(sk, &timeout,
d9dc8b0f 995 sock_flag(sk, SOCK_DONE), &wait))
06a8fc78
AH
996 break;
997 } while (!signal_pending(current) && timeout);
998
d9dc8b0f 999 remove_wait_queue(sk_sleep(sk), &wait);
06a8fc78
AH
1000 }
1001}
1002
1003static void virtio_transport_do_close(struct vsock_sock *vsk,
1004 bool cancel_timeout)
1005{
1006 struct sock *sk = sk_vsock(vsk);
1007
1008 sock_set_flag(sk, SOCK_DONE);
1009 vsk->peer_shutdown = SHUTDOWN_MASK;
1010 if (vsock_stream_has_data(vsk) <= 0)
3b4477d2 1011 sk->sk_state = TCP_CLOSING;
06a8fc78
AH
1012 sk->sk_state_change(sk);
1013
1014 if (vsk->close_work_scheduled &&
1015 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
1016 vsk->close_work_scheduled = false;
1017
8432b811 1018 virtio_transport_remove_sock(vsk);
06a8fc78
AH
1019
1020 /* Release refcnt obtained when we scheduled the timeout */
1021 sock_put(sk);
1022 }
1023}
1024
1025static void virtio_transport_close_timeout(struct work_struct *work)
1026{
1027 struct vsock_sock *vsk =
1028 container_of(work, struct vsock_sock, close_work.work);
1029 struct sock *sk = sk_vsock(vsk);
1030
1031 sock_hold(sk);
1032 lock_sock(sk);
1033
1034 if (!sock_flag(sk, SOCK_DONE)) {
1035 (void)virtio_transport_reset(vsk, NULL);
1036
1037 virtio_transport_do_close(vsk, false);
1038 }
1039
1040 vsk->close_work_scheduled = false;
1041
1042 release_sock(sk);
1043 sock_put(sk);
1044}
1045
1046/* User context, vsk->sk is locked */
1047static bool virtio_transport_close(struct vsock_sock *vsk)
1048{
1049 struct sock *sk = &vsk->sk;
1050
3b4477d2
SH
1051 if (!(sk->sk_state == TCP_ESTABLISHED ||
1052 sk->sk_state == TCP_CLOSING))
06a8fc78
AH
1053 return true;
1054
1055 /* Already received SHUTDOWN from peer, reply with RST */
1056 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) {
1057 (void)virtio_transport_reset(vsk, NULL);
1058 return true;
1059 }
1060
1061 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
1062 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
1063
1064 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
1065 virtio_transport_wait_close(sk, sk->sk_lingertime);
1066
1067 if (sock_flag(sk, SOCK_DONE)) {
1068 return true;
1069 }
1070
1071 sock_hold(sk);
1072 INIT_DELAYED_WORK(&vsk->close_work,
1073 virtio_transport_close_timeout);
1074 vsk->close_work_scheduled = true;
1075 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT);
1076 return false;
1077}
1078
1079void virtio_transport_release(struct vsock_sock *vsk)
1080{
1081 struct sock *sk = &vsk->sk;
1082 bool remove_sock = true;
1083
9ac841f5 1084 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
06a8fc78 1085 remove_sock = virtio_transport_close(vsk);
ac03046e 1086
3fe356d5
SG
1087 if (remove_sock) {
1088 sock_set_flag(sk, SOCK_DONE);
8432b811 1089 virtio_transport_remove_sock(vsk);
3fe356d5 1090 }
06a8fc78
AH
1091}
1092EXPORT_SYMBOL_GPL(virtio_transport_release);
1093
1094static int
1095virtio_transport_recv_connecting(struct sock *sk,
71dc9ec9 1096 struct sk_buff *skb)
06a8fc78 1097{
71dc9ec9 1098 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
06a8fc78 1099 struct vsock_sock *vsk = vsock_sk(sk);
06a8fc78 1100 int skerr;
71dc9ec9 1101 int err;
06a8fc78 1102
71dc9ec9 1103 switch (le16_to_cpu(hdr->op)) {
06a8fc78 1104 case VIRTIO_VSOCK_OP_RESPONSE:
3b4477d2 1105 sk->sk_state = TCP_ESTABLISHED;
06a8fc78
AH
1106 sk->sk_socket->state = SS_CONNECTED;
1107 vsock_insert_connected(vsk);
1108 sk->sk_state_change(sk);
1109 break;
1110 case VIRTIO_VSOCK_OP_INVALID:
1111 break;
1112 case VIRTIO_VSOCK_OP_RST:
1113 skerr = ECONNRESET;
1114 err = 0;
1115 goto destroy;
1116 default:
1117 skerr = EPROTO;
1118 err = -EINVAL;
1119 goto destroy;
1120 }
1121 return 0;
1122
1123destroy:
71dc9ec9 1124 virtio_transport_reset(vsk, skb);
3b4477d2 1125 sk->sk_state = TCP_CLOSE;
06a8fc78 1126 sk->sk_err = skerr;
e3ae2365 1127 sk_error_report(sk);
06a8fc78
AH
1128 return err;
1129}
1130
473c7391
SG
1131static void
1132virtio_transport_recv_enqueue(struct vsock_sock *vsk,
71dc9ec9 1133 struct sk_buff *skb)
473c7391
SG
1134{
1135 struct virtio_vsock_sock *vvs = vsk->trans;
ae6fcfbf 1136 bool can_enqueue, free_pkt = false;
71dc9ec9
BE
1137 struct virtio_vsock_hdr *hdr;
1138 u32 len;
473c7391 1139
71dc9ec9
BE
1140 hdr = virtio_vsock_hdr(skb);
1141 len = le32_to_cpu(hdr->len);
473c7391
SG
1142
1143 spin_lock_bh(&vvs->rx_lock);
1144
07770616 1145 can_enqueue = virtio_transport_inc_rx_pkt(vvs, len);
ae6fcfbf
SG
1146 if (!can_enqueue) {
1147 free_pkt = true;
1148 goto out;
1149 }
473c7391 1150
71dc9ec9 1151 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
e4b1ef15
AK
1152 vvs->msg_count++;
1153
473c7391
SG
1154 /* Try to copy small packets into the buffer of last packet queued,
1155 * to avoid wasting memory queueing the entire buffer with a small
1156 * payload.
1157 */
71dc9ec9
BE
1158 if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) {
1159 struct virtio_vsock_hdr *last_hdr;
1160 struct sk_buff *last_skb;
473c7391 1161
71dc9ec9
BE
1162 last_skb = skb_peek_tail(&vvs->rx_queue);
1163 last_hdr = virtio_vsock_hdr(last_skb);
473c7391
SG
1164
1165 /* If there is space in the last packet queued, we copy the
e4b1ef15 1166 * new packet in its buffer. We avoid this if the last packet
9af8f106
AK
1167 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
1168 * delimiter of SEQPACKET message, so 'pkt' is the first packet
1169 * of a new message.
473c7391 1170 */
71dc9ec9
BE
1171 if (skb->len < skb_tailroom(last_skb) &&
1172 !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
1173 memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
473c7391 1174 free_pkt = true;
71dc9ec9 1175 last_hdr->flags |= hdr->flags;
f7154d96 1176 le32_add_cpu(&last_hdr->len, len);
473c7391
SG
1177 goto out;
1178 }
1179 }
1180
71dc9ec9 1181 __skb_queue_tail(&vvs->rx_queue, skb);
473c7391
SG
1182
1183out:
1184 spin_unlock_bh(&vvs->rx_lock);
1185 if (free_pkt)
71dc9ec9 1186 kfree_skb(skb);
473c7391
SG
1187}
1188
06a8fc78
AH
1189static int
1190virtio_transport_recv_connected(struct sock *sk,
71dc9ec9 1191 struct sk_buff *skb)
06a8fc78 1192{
71dc9ec9 1193 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
06a8fc78 1194 struct vsock_sock *vsk = vsock_sk(sk);
06a8fc78
AH
1195 int err = 0;
1196
71dc9ec9 1197 switch (le16_to_cpu(hdr->op)) {
06a8fc78 1198 case VIRTIO_VSOCK_OP_RW:
71dc9ec9 1199 virtio_transport_recv_enqueue(vsk, skb);
39f1ed33 1200 vsock_data_ready(sk);
06a8fc78 1201 return err;
e3ea110d
HU
1202 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
1203 virtio_transport_send_credit_update(vsk);
1204 break;
06a8fc78
AH
1205 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
1206 sk->sk_write_space(sk);
1207 break;
1208 case VIRTIO_VSOCK_OP_SHUTDOWN:
71dc9ec9 1209 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
06a8fc78 1210 vsk->peer_shutdown |= RCV_SHUTDOWN;
71dc9ec9 1211 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
06a8fc78
AH
1212 vsk->peer_shutdown |= SEND_SHUTDOWN;
1213 if (vsk->peer_shutdown == SHUTDOWN_MASK &&
ad8a7220
SG
1214 vsock_stream_has_data(vsk) <= 0 &&
1215 !sock_flag(sk, SOCK_DONE)) {
1216 (void)virtio_transport_reset(vsk, NULL);
ad8a7220 1217 virtio_transport_do_close(vsk, true);
42f5cda5 1218 }
71dc9ec9 1219 if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
06a8fc78
AH
1220 sk->sk_state_change(sk);
1221 break;
1222 case VIRTIO_VSOCK_OP_RST:
1223 virtio_transport_do_close(vsk, true);
1224 break;
1225 default:
1226 err = -EINVAL;
1227 break;
1228 }
1229
71dc9ec9 1230 kfree_skb(skb);
06a8fc78
AH
1231 return err;
1232}
1233
1234static void
1235virtio_transport_recv_disconnecting(struct sock *sk,
71dc9ec9 1236 struct sk_buff *skb)
06a8fc78 1237{
71dc9ec9 1238 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
06a8fc78
AH
1239 struct vsock_sock *vsk = vsock_sk(sk);
1240
71dc9ec9 1241 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
06a8fc78
AH
1242 virtio_transport_do_close(vsk, true);
1243}
1244
1245static int
1246virtio_transport_send_response(struct vsock_sock *vsk,
71dc9ec9 1247 struct sk_buff *skb)
06a8fc78 1248{
71dc9ec9 1249 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
06a8fc78
AH
1250 struct virtio_vsock_pkt_info info = {
1251 .op = VIRTIO_VSOCK_OP_RESPONSE,
71dc9ec9
BE
1252 .remote_cid = le64_to_cpu(hdr->src_cid),
1253 .remote_port = le32_to_cpu(hdr->src_port),
06a8fc78 1254 .reply = true,
36d277ba 1255 .vsk = vsk,
06a8fc78
AH
1256 };
1257
1258 return virtio_transport_send_pkt_info(vsk, &info);
1259}
1260
c0cfa2d8 1261static bool virtio_transport_space_update(struct sock *sk,
71dc9ec9 1262 struct sk_buff *skb)
c0cfa2d8 1263{
71dc9ec9 1264 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
c0cfa2d8
SG
1265 struct vsock_sock *vsk = vsock_sk(sk);
1266 struct virtio_vsock_sock *vvs = vsk->trans;
1267 bool space_available;
1268
1269 /* Listener sockets are not associated with any transport, so we are
1270 * not able to take the state to see if there is space available in the
1271 * remote peer, but since they are only used to receive requests, we
1272 * can assume that there is always space available in the other peer.
1273 */
1274 if (!vvs)
1275 return true;
1276
1277 /* buf_alloc and fwd_cnt is always included in the hdr */
1278 spin_lock_bh(&vvs->tx_lock);
71dc9ec9
BE
1279 vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
1280 vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
c0cfa2d8
SG
1281 space_available = virtio_transport_has_space(vsk);
1282 spin_unlock_bh(&vvs->tx_lock);
1283 return space_available;
1284}
1285
06a8fc78
AH
1286/* Handle server socket */
1287static int
71dc9ec9 1288virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
c0cfa2d8 1289 struct virtio_transport *t)
06a8fc78 1290{
71dc9ec9 1291 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
06a8fc78
AH
1292 struct vsock_sock *vsk = vsock_sk(sk);
1293 struct vsock_sock *vchild;
1294 struct sock *child;
c0cfa2d8 1295 int ret;
06a8fc78 1296
71dc9ec9
BE
1297 if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
1298 virtio_transport_reset_no_sock(t, skb);
06a8fc78
AH
1299 return -EINVAL;
1300 }
1301
1302 if (sk_acceptq_is_full(sk)) {
71dc9ec9 1303 virtio_transport_reset_no_sock(t, skb);
06a8fc78
AH
1304 return -ENOMEM;
1305 }
1306
b9ca2f5f 1307 child = vsock_create_connected(sk);
06a8fc78 1308 if (!child) {
71dc9ec9 1309 virtio_transport_reset_no_sock(t, skb);
06a8fc78
AH
1310 return -ENOMEM;
1311 }
1312
7976a11b 1313 sk_acceptq_added(sk);
06a8fc78
AH
1314
1315 lock_sock_nested(child, SINGLE_DEPTH_NESTING);
1316
3b4477d2 1317 child->sk_state = TCP_ESTABLISHED;
06a8fc78
AH
1318
1319 vchild = vsock_sk(child);
71dc9ec9
BE
1320 vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid),
1321 le32_to_cpu(hdr->dst_port));
1322 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid),
1323 le32_to_cpu(hdr->src_port));
06a8fc78 1324
c0cfa2d8
SG
1325 ret = vsock_assign_transport(vchild, vsk);
1326 /* Transport assigned (looking at remote_addr) must be the same
1327 * where we received the request.
1328 */
1329 if (ret || vchild->transport != &t->transport) {
1330 release_sock(child);
71dc9ec9 1331 virtio_transport_reset_no_sock(t, skb);
c0cfa2d8
SG
1332 sock_put(child);
1333 return ret;
1334 }
1335
71dc9ec9 1336 if (virtio_transport_space_update(child, skb))
c0cfa2d8
SG
1337 child->sk_write_space(child);
1338
06a8fc78
AH
1339 vsock_insert_connected(vchild);
1340 vsock_enqueue_accept(sk, child);
71dc9ec9 1341 virtio_transport_send_response(vchild, skb);
06a8fc78
AH
1342
1343 release_sock(child);
1344
1345 sk->sk_data_ready(sk);
1346 return 0;
1347}
1348
e4b1ef15
AK
1349static bool virtio_transport_valid_type(u16 type)
1350{
1351 return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
1352 (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
1353}
1354
06a8fc78
AH
1355/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
1356 * lock.
1357 */
4c7246dc 1358void virtio_transport_recv_pkt(struct virtio_transport *t,
71dc9ec9 1359 struct sk_buff *skb)
06a8fc78 1360{
71dc9ec9 1361 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
06a8fc78
AH
1362 struct sockaddr_vm src, dst;
1363 struct vsock_sock *vsk;
1364 struct sock *sk;
1365 bool space_available;
1366
71dc9ec9
BE
1367 vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
1368 le32_to_cpu(hdr->src_port));
1369 vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
1370 le32_to_cpu(hdr->dst_port));
06a8fc78
AH
1371
1372 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
1373 dst.svm_cid, dst.svm_port,
71dc9ec9
BE
1374 le32_to_cpu(hdr->len),
1375 le16_to_cpu(hdr->type),
1376 le16_to_cpu(hdr->op),
1377 le32_to_cpu(hdr->flags),
1378 le32_to_cpu(hdr->buf_alloc),
1379 le32_to_cpu(hdr->fwd_cnt));
1380
1381 if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
1382 (void)virtio_transport_reset_no_sock(t, skb);
06a8fc78
AH
1383 goto free_pkt;
1384 }
1385
1386 /* The socket must be in connected or bound table
1387 * otherwise send reset back
1388 */
1389 sk = vsock_find_connected_socket(&src, &dst);
1390 if (!sk) {
1391 sk = vsock_find_bound_socket(&dst);
1392 if (!sk) {
71dc9ec9 1393 (void)virtio_transport_reset_no_sock(t, skb);
06a8fc78
AH
1394 goto free_pkt;
1395 }
1396 }
1397
71dc9ec9
BE
1398 if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
1399 (void)virtio_transport_reset_no_sock(t, skb);
e4b1ef15
AK
1400 sock_put(sk);
1401 goto free_pkt;
1402 }
1403
f9d2b1e1
BE
1404 if (!skb_set_owner_sk_safe(skb, sk)) {
1405 WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n");
1406 goto free_pkt;
1407 }
1408
06a8fc78
AH
1409 vsk = vsock_sk(sk);
1410
06a8fc78
AH
1411 lock_sock(sk);
1412
3fe356d5
SG
1413 /* Check if sk has been closed before lock_sock */
1414 if (sock_flag(sk, SOCK_DONE)) {
71dc9ec9 1415 (void)virtio_transport_reset_no_sock(t, skb);
8692cefc
JH
1416 release_sock(sk);
1417 sock_put(sk);
1418 goto free_pkt;
1419 }
1420
71dc9ec9 1421 space_available = virtio_transport_space_update(sk, skb);
ce7536bc 1422
06a8fc78 1423 /* Update CID in case it has changed after a transport reset event */
1db8f5fc
WW
1424 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
1425 vsk->local_addr.svm_cid = dst.svm_cid;
06a8fc78
AH
1426
1427 if (space_available)
1428 sk->sk_write_space(sk);
1429
1430 switch (sk->sk_state) {
3b4477d2 1431 case TCP_LISTEN:
71dc9ec9
BE
1432 virtio_transport_recv_listen(sk, skb, t);
1433 kfree_skb(skb);
06a8fc78 1434 break;
3b4477d2 1435 case TCP_SYN_SENT:
71dc9ec9
BE
1436 virtio_transport_recv_connecting(sk, skb);
1437 kfree_skb(skb);
06a8fc78 1438 break;
3b4477d2 1439 case TCP_ESTABLISHED:
71dc9ec9 1440 virtio_transport_recv_connected(sk, skb);
06a8fc78 1441 break;
3b4477d2 1442 case TCP_CLOSING:
71dc9ec9
BE
1443 virtio_transport_recv_disconnecting(sk, skb);
1444 kfree_skb(skb);
06a8fc78
AH
1445 break;
1446 default:
71dc9ec9
BE
1447 (void)virtio_transport_reset_no_sock(t, skb);
1448 kfree_skb(skb);
06a8fc78
AH
1449 break;
1450 }
c0cfa2d8 1451
06a8fc78
AH
1452 release_sock(sk);
1453
1454 /* Release refcnt obtained when we fetched this socket out of the
1455 * bound or connected list.
1456 */
1457 sock_put(sk);
1458 return;
1459
1460free_pkt:
71dc9ec9 1461 kfree_skb(skb);
06a8fc78
AH
1462}
1463EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
1464
71dc9ec9
BE
1465/* Remove skbs found in a queue that have a vsk that matches.
1466 *
1467 * Each skb is freed.
1468 *
1469 * Returns the count of skbs that were reply packets.
1470 */
1471int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
06a8fc78 1472{
71dc9ec9
BE
1473 struct sk_buff_head freeme;
1474 struct sk_buff *skb, *tmp;
1475 int cnt = 0;
1476
1477 skb_queue_head_init(&freeme);
1478
1479 spin_lock_bh(&queue->lock);
1480 skb_queue_walk_safe(queue, skb, tmp) {
1481 if (vsock_sk(skb->sk) != vsk)
1482 continue;
1483
1484 __skb_unlink(skb, queue);
1485 __skb_queue_tail(&freeme, skb);
1486
1487 if (virtio_vsock_skb_reply(skb))
1488 cnt++;
1489 }
1490 spin_unlock_bh(&queue->lock);
1491
1492 __skb_queue_purge(&freeme);
1493
1494 return cnt;
06a8fc78 1495}
71dc9ec9 1496EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);
06a8fc78 1497
634f1a71
BE
1498int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor)
1499{
1500 struct virtio_vsock_sock *vvs = vsk->trans;
1501 struct sock *sk = sk_vsock(vsk);
1502 struct sk_buff *skb;
1503 int off = 0;
634f1a71
BE
1504 int err;
1505
1506 spin_lock_bh(&vvs->rx_lock);
1507 /* Use __skb_recv_datagram() for race-free handling of the receive. It
1508 * works for types other than dgrams.
1509 */
1510 skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err);
1511 spin_unlock_bh(&vvs->rx_lock);
1512
1513 if (!skb)
1514 return err;
1515
78fa0d61 1516 return recv_actor(sk, skb);
634f1a71
BE
1517}
1518EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
1519
06a8fc78
AH
1520MODULE_LICENSE("GPL v2");
1521MODULE_AUTHOR("Asias He");
1522MODULE_DESCRIPTION("common code for virtio vsock");