Merge tag 'perf-tools-fixes-for-v6.4-1-2023-05-20' of git://git.kernel.org/pub/scm...
[linux-block.git] / net / sunrpc / svcsock.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * linux/net/sunrpc/svcsock.c
4 *
5 * These are the RPC server socket internals.
6 *
7 * The server scheduling algorithm does not always distribute the load
8 * evenly when servicing a single client. May need to modify the
f6150c3c 9 * svc_xprt_enqueue procedure...
1da177e4
LT
10 *
11 * TCP support is largely untested and may be a little slow. The problem
12 * is that we currently do two separate recvfrom's, one for the 4-byte
13 * record length, and the second for the actual record. This could possibly
14 * be improved by always reading a minimum size of around 100 bytes and
15 * tucking any superfluous bytes away in a temporary store. Still, that
16 * leaves write requests out in the rain. An alternative may be to peek at
17 * the first skb in the queue, and if it matches the next TCP sequence
18 * number, to extract the record marker. Yuck.
19 *
20 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
21 */
22
172589cc 23#include <linux/kernel.h>
1da177e4 24#include <linux/sched.h>
3a9a231d 25#include <linux/module.h>
1da177e4
LT
26#include <linux/errno.h>
27#include <linux/fcntl.h>
28#include <linux/net.h>
29#include <linux/in.h>
30#include <linux/inet.h>
31#include <linux/udp.h>
91483c4b 32#include <linux/tcp.h>
1da177e4
LT
33#include <linux/unistd.h>
34#include <linux/slab.h>
35#include <linux/netdevice.h>
36#include <linux/skbuff.h>
b41b66d6 37#include <linux/file.h>
7dfb7103 38#include <linux/freezer.h>
1da177e4
LT
39#include <net/sock.h>
40#include <net/checksum.h>
41#include <net/ip.h>
b92503b2 42#include <net/ipv6.h>
850cbadd 43#include <net/udp.h>
b7872fe8 44#include <net/tcp.h>
c752f073 45#include <net/tcp_states.h>
5e052dda 46#include <net/tls.h>
b3cbf98e 47#include <net/handshake.h>
7c0f6ba6 48#include <linux/uaccess.h>
becd2014 49#include <linux/highmem.h>
1da177e4 50#include <asm/ioctls.h>
b3cbf98e 51#include <linux/key.h>
1da177e4
LT
52
53#include <linux/sunrpc/types.h>
ad06e4bd 54#include <linux/sunrpc/clnt.h>
1da177e4 55#include <linux/sunrpc/xdr.h>
c0401ea0 56#include <linux/sunrpc/msg_prot.h>
1da177e4
LT
57#include <linux/sunrpc/svcsock.h>
58#include <linux/sunrpc/stats.h>
4cfc7e60 59#include <linux/sunrpc/xprt.h>
1da177e4 60
40e0b090 61#include <trace/events/sock.h>
998024de
CL
62#include <trace/events/sunrpc.h>
63
9e55eef4 64#include "socklib.h"
177e4f99
HS
65#include "sunrpc.h"
66
360d8738 67#define RPCDBG_FACILITY RPCDBG_SVCXPRT
1da177e4 68
b3cbf98e
CL
69/* To-do: to avoid tying up an nfsd thread while waiting for a
70 * handshake request, the request could instead be deferred.
71 */
72enum {
73 SVC_HANDSHAKE_TO = 5U * HZ
74};
1da177e4
LT
75
76static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
72c35376 77 int flags);
1da177e4
LT
78static int svc_udp_recvfrom(struct svc_rqst *);
79static int svc_udp_sendto(struct svc_rqst *);
755cceab 80static void svc_sock_detach(struct svc_xprt *);
69b6ba37 81static void svc_tcp_sock_detach(struct svc_xprt *);
755cceab 82static void svc_sock_free(struct svc_xprt *);
1da177e4 83
b700cbb1 84static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
62832c03
PE
85 struct net *, struct sockaddr *,
86 int, int);
ed07536e
PZ
87#ifdef CONFIG_DEBUG_LOCK_ALLOC
88static struct lock_class_key svc_key[2];
89static struct lock_class_key svc_slock_key[2];
90
0f0257ea 91static void svc_reclassify_socket(struct socket *sock)
ed07536e
PZ
92{
93 struct sock *sk = sock->sk;
1b7a1819 94
fafc4e1e 95 if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
1b7a1819
WAA
96 return;
97
ed07536e
PZ
98 switch (sk->sk_family) {
99 case AF_INET:
100 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
def13d74
TT
101 &svc_slock_key[0],
102 "sk_xprt.xpt_lock-AF_INET-NFSD",
103 &svc_key[0]);
ed07536e
PZ
104 break;
105
106 case AF_INET6:
107 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
def13d74
TT
108 &svc_slock_key[1],
109 "sk_xprt.xpt_lock-AF_INET6-NFSD",
110 &svc_key[1]);
ed07536e
PZ
111 break;
112
113 default:
114 BUG();
115 }
116}
117#else
0f0257ea 118static void svc_reclassify_socket(struct socket *sock)
ed07536e
PZ
119{
120}
121#endif
122
cca557a5 123/**
948f072a
N
124 * svc_tcp_release_ctxt - Release transport-related resources
125 * @xprt: the transport which owned the context
126 * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
cca557a5 127 *
1da177e4 128 */
948f072a 129static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
1da177e4 130{
1da177e4
LT
131}
132
cca557a5 133/**
948f072a
N
134 * svc_udp_release_ctxt - Release transport-related resources
135 * @xprt: the transport which owned the context
136 * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
cca557a5
CL
137 *
138 */
948f072a 139static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
850cbadd 140{
948f072a 141 struct sk_buff *skb = ctxt;
850cbadd 142
948f072a 143 if (skb)
850cbadd 144 consume_skb(skb);
850cbadd
PA
145}
146
b92503b2
CL
147union svc_pktinfo_u {
148 struct in_pktinfo pkti;
b92503b2 149 struct in6_pktinfo pkti6;
b92503b2 150};
bc375ea7
DM
151#define SVC_PKTINFO_SPACE \
152 CMSG_SPACE(sizeof(union svc_pktinfo_u))
b92503b2
CL
153
154static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
155{
57b1d3ba
TT
156 struct svc_sock *svsk =
157 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
158 switch (svsk->sk_sk->sk_family) {
b92503b2
CL
159 case AF_INET: {
160 struct in_pktinfo *pki = CMSG_DATA(cmh);
161
162 cmh->cmsg_level = SOL_IP;
163 cmh->cmsg_type = IP_PKTINFO;
164 pki->ipi_ifindex = 0;
849a1cf1
MJ
165 pki->ipi_spec_dst.s_addr =
166 svc_daddr_in(rqstp)->sin_addr.s_addr;
b92503b2
CL
167 cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
168 }
169 break;
5a05ed73 170
b92503b2
CL
171 case AF_INET6: {
172 struct in6_pktinfo *pki = CMSG_DATA(cmh);
849a1cf1 173 struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);
b92503b2
CL
174
175 cmh->cmsg_level = SOL_IPV6;
176 cmh->cmsg_type = IPV6_PKTINFO;
849a1cf1 177 pki->ipi6_ifindex = daddr->sin6_scope_id;
4e3fd7a0 178 pki->ipi6_addr = daddr->sin6_addr;
b92503b2
CL
179 cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
180 }
181 break;
b92503b2 182 }
b92503b2
CL
183}
184
03493bca
CL
185static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset,
186 unsigned int length)
41205539
CL
187{
188 return 0;
189}
190
80212d59
N
191/*
192 * Report socket names for nfsdfs
193 */
e7942b9f 194static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
80212d59 195{
017cb47f
CL
196 const struct sock *sk = svsk->sk_sk;
197 const char *proto_name = sk->sk_protocol == IPPROTO_UDP ?
198 "udp" : "tcp";
80212d59
N
199 int len;
200
017cb47f 201 switch (sk->sk_family) {
e7942b9f
CL
202 case PF_INET:
203 len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
017cb47f 204 proto_name,
c720c7e8
ED
205 &inet_sk(sk)->inet_rcv_saddr,
206 inet_sk(sk)->inet_num);
80212d59 207 break;
c2bb06db 208#if IS_ENABLED(CONFIG_IPV6)
58de2f86
CL
209 case PF_INET6:
210 len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
017cb47f 211 proto_name,
efe4208f 212 &sk->sk_v6_rcv_saddr,
c720c7e8 213 inet_sk(sk)->inet_num);
80212d59 214 break;
c2bb06db 215#endif
80212d59 216 default:
e7942b9f 217 len = snprintf(buf, remaining, "*unknown-%d*\n",
017cb47f 218 sk->sk_family);
80212d59 219 }
e7942b9f
CL
220
221 if (len >= remaining) {
222 *buf = '\0';
223 return -ENAMETOOLONG;
80212d59
N
224 }
225 return len;
226}
227
5e052dda
CL
228static int
229svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg,
230 struct cmsghdr *cmsg, int ret)
231{
232 if (cmsg->cmsg_level == SOL_TLS &&
233 cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
234 u8 content_type = *((u8 *)CMSG_DATA(cmsg));
235
236 switch (content_type) {
237 case TLS_RECORD_TYPE_DATA:
238 /* TLS sets EOR at the end of each application data
239 * record, even though there might be more frames
240 * waiting to be decrypted.
241 */
242 msg->msg_flags &= ~MSG_EOR;
243 break;
244 case TLS_RECORD_TYPE_ALERT:
245 ret = -ENOTCONN;
246 break;
247 default:
248 ret = -EAGAIN;
249 }
250 }
251 return ret;
252}
253
254static int
255svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
256{
257 union {
258 struct cmsghdr cmsg;
259 u8 buf[CMSG_SPACE(sizeof(u8))];
260 } u;
261 int ret;
262
263 msg->msg_control = &u;
264 msg->msg_controllen = sizeof(u);
265 ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT);
266 if (unlikely(msg->msg_controllen != sizeof(u)))
267 ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret);
268 return ret;
269}
270
ca07eda3
CL
271#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
272static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
273{
274 struct bvec_iter bi = {
13a9a9d7 275 .bi_size = size + seek,
ca07eda3
CL
276 };
277 struct bio_vec bv;
278
279 bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
280 for_each_bvec(bv, bvec, bi, bi)
281 flush_dcache_page(bv.bv_page);
282}
283#else
284static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size,
285 size_t seek)
286{
287}
288#endif
289
1da177e4 290/*
ca07eda3
CL
291 * Read from @rqstp's transport socket. The incoming message fills whole
292 * pages in @rqstp's rq_pages array until the last page of the message
293 * has been received into a partial page.
1da177e4 294 */
ca07eda3
CL
295static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
296 size_t seek)
1da177e4 297{
57b1d3ba
TT
298 struct svc_sock *svsk =
299 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
ca07eda3 300 struct bio_vec *bvec = rqstp->rq_bvec;
4c8e5537 301 struct msghdr msg = { NULL };
ca07eda3 302 unsigned int i;
4c8e5537 303 ssize_t len;
ca07eda3 304 size_t t;
1da177e4 305
f8d1ff47 306 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
ca07eda3 307
9088151f
CH
308 for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE)
309 bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0);
ca07eda3
CL
310 rqstp->rq_respages = &rqstp->rq_pages[i];
311 rqstp->rq_next_page = rqstp->rq_respages + 1;
312
de4eda9d 313 iov_iter_bvec(&msg.msg_iter, ITER_DEST, bvec, i, buflen);
ca07eda3
CL
314 if (seek) {
315 iov_iter_advance(&msg.msg_iter, seek);
316 buflen -= seek;
4c8e5537 317 }
5e052dda 318 len = svc_tcp_sock_recv_cmsg(svsk, &msg);
ca07eda3
CL
319 if (len > 0)
320 svc_flush_bvec(bvec, len, seek);
321
f8d1ff47
TM
322 /* If we read a full record, then assume there may be more
323 * data to read (stream based sockets only!)
324 */
325 if (len == buflen)
326 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1da177e4 327
1da177e4
LT
328 return len;
329}
330
331/*
332 * Set socket snd and rcv buffer lengths
333 */
b7e5034c 334static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
1da177e4 335{
b7e5034c
BF
336 unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
337 struct socket *sock = svsk->sk_sock;
338
339 nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
340
1da177e4 341 lock_sock(sock->sk);
b7e5034c
BF
342 sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
343 sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
47fcb03f 344 sock->sk->sk_write_space(sock->sk);
1da177e4 345 release_sock(sock->sk);
1da177e4 346}
16e4d93f 347
989f881e 348static void svc_sock_secure_port(struct svc_rqst *rqstp)
16e4d93f 349{
989f881e 350 if (svc_port_is_privileged(svc_addr(rqstp)))
7827c81f 351 set_bit(RQ_SECURE, &rqstp->rq_flags);
989f881e 352 else
7827c81f 353 clear_bit(RQ_SECURE, &rqstp->rq_flags);
16e4d93f
CL
354}
355
1da177e4
LT
356/*
357 * INET callback when data has been received on the socket.
358 */
fa9251af 359static void svc_data_ready(struct sock *sk)
1da177e4 360{
939bb7ef 361 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
1da177e4 362
40e0b090
PY
363 trace_sk_data_ready(sk);
364
939bb7ef 365 if (svsk) {
eebe53e8
VL
366 /* Refer to svc_setup_socket() for details. */
367 rmb();
fa9251af 368 svsk->sk_odata(sk);
998024de 369 trace_svcsock_data_ready(&svsk->sk_xprt, 0);
b3cbf98e
CL
370 if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
371 return;
4720b070
TM
372 if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
373 svc_xprt_enqueue(&svsk->sk_xprt);
939bb7ef 374 }
1da177e4
LT
375}
376
377/*
378 * INET callback when space is newly available on the socket.
379 */
0f0257ea 380static void svc_write_space(struct sock *sk)
1da177e4
LT
381{
382 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
383
384 if (svsk) {
eebe53e8
VL
385 /* Refer to svc_setup_socket() for details. */
386 rmb();
998024de 387 trace_svcsock_write_space(&svsk->sk_xprt, 0);
fa9251af 388 svsk->sk_owspace(sk);
f6150c3c 389 svc_xprt_enqueue(&svsk->sk_xprt);
1da177e4 390 }
1da177e4
LT
391}
392
c7fb3f06
TM
393static int svc_tcp_has_wspace(struct svc_xprt *xprt)
394{
637600f3 395 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
c7fb3f06
TM
396
397 if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
398 return 1;
637600f3 399 return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
51877680
TM
400}
401
ea08e392
SM
402static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
403{
c433594c
CH
404 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
405
406 sock_no_linger(svsk->sk_sock->sk);
ea08e392
SM
407}
408
b3cbf98e
CL
409/**
410 * svc_tcp_handshake_done - Handshake completion handler
411 * @data: address of xprt to wake
412 * @status: status of handshake
413 * @peerid: serial number of key containing the remote peer's identity
414 *
415 * If a security policy is specified as an export option, we don't
416 * have a specific export here to check. So we set a "TLS session
417 * is present" flag on the xprt and let an upper layer enforce local
418 * security policy.
419 */
420static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
421{
422 struct svc_xprt *xprt = data;
423 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
424
425 if (!status) {
426 if (peerid != TLS_NO_PEERID)
427 set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
428 set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
429 }
430 clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
431 complete_all(&svsk->sk_handshake_done);
432}
433
434/**
435 * svc_tcp_handshake - Perform a transport-layer security handshake
436 * @xprt: connected transport endpoint
437 *
438 */
439static void svc_tcp_handshake(struct svc_xprt *xprt)
440{
441 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
442 struct sock *sk = svsk->sk_sock->sk;
443 struct tls_handshake_args args = {
444 .ta_sock = svsk->sk_sock,
445 .ta_done = svc_tcp_handshake_done,
446 .ta_data = xprt,
447 };
448 int ret;
449
450 trace_svc_tls_upcall(xprt);
451
452 clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
453 init_completion(&svsk->sk_handshake_done);
454
455 ret = tls_server_hello_x509(&args, GFP_KERNEL);
456 if (ret) {
457 trace_svc_tls_not_started(xprt);
458 goto out_failed;
459 }
460
461 ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
462 SVC_HANDSHAKE_TO);
463 if (ret <= 0) {
464 if (tls_handshake_cancel(sk)) {
465 trace_svc_tls_timed_out(xprt);
466 goto out_close;
467 }
468 }
469
470 if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
471 trace_svc_tls_unavailable(xprt);
472 goto out_close;
473 }
474
475 /* Mark the transport ready in case the remote sent RPC
476 * traffic before the kernel received the handshake
477 * completion downcall.
478 */
479 set_bit(XPT_DATA, &xprt->xpt_flags);
480 svc_xprt_enqueue(xprt);
481 return;
482
483out_close:
484 set_bit(XPT_CLOSE, &xprt->xpt_flags);
485out_failed:
486 clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
487 set_bit(XPT_DATA, &xprt->xpt_flags);
488 svc_xprt_enqueue(xprt);
489}
490
7702ce40
CL
491/*
492 * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
493 */
494static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
495 struct cmsghdr *cmh)
496{
497 struct in_pktinfo *pki = CMSG_DATA(cmh);
849a1cf1
MJ
498 struct sockaddr_in *daddr = svc_daddr_in(rqstp);
499
7702ce40
CL
500 if (cmh->cmsg_type != IP_PKTINFO)
501 return 0;
849a1cf1
MJ
502
503 daddr->sin_family = AF_INET;
504 daddr->sin_addr.s_addr = pki->ipi_spec_dst.s_addr;
7702ce40
CL
505 return 1;
506}
507
508/*
73df66f8 509 * See net/ipv6/datagram.c : ip6_datagram_recv_ctl
7702ce40
CL
510 */
511static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
512 struct cmsghdr *cmh)
513{
514 struct in6_pktinfo *pki = CMSG_DATA(cmh);
849a1cf1
MJ
515 struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);
516
7702ce40
CL
517 if (cmh->cmsg_type != IPV6_PKTINFO)
518 return 0;
849a1cf1
MJ
519
520 daddr->sin6_family = AF_INET6;
4e3fd7a0 521 daddr->sin6_addr = pki->ipi6_addr;
849a1cf1 522 daddr->sin6_scope_id = pki->ipi6_ifindex;
7702ce40
CL
523 return 1;
524}
525
9dbc240f
TT
526/*
527 * Copy the UDP datagram's destination address to the rqstp structure.
528 * The 'destination' address in this case is the address to which the
529 * peer sent the datagram, i.e. our local address. For multihomed
530 * hosts, this can change from msg to msg. Note that only the IP
531 * address changes, the port number should remain the same.
532 */
7702ce40
CL
533static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
534 struct cmsghdr *cmh)
95756482 535{
7702ce40
CL
536 switch (cmh->cmsg_level) {
537 case SOL_IP:
538 return svc_udp_get_dest_address4(rqstp, cmh);
539 case SOL_IPV6:
540 return svc_udp_get_dest_address6(rqstp, cmh);
95756482 541 }
7702ce40
CL
542
543 return 0;
95756482
CL
544}
545
fff1ebb2
CL
546/**
547 * svc_udp_recvfrom - Receive a datagram from a UDP socket.
548 * @rqstp: request structure into which to receive an RPC Call
549 *
550 * Called in a loop when XPT_DATA has been set.
551 *
552 * Returns:
553 * On success, the number of bytes in a received RPC Call, or
554 * %0 if a complete RPC Call message was not ready to return
1da177e4 555 */
0f0257ea 556static int svc_udp_recvfrom(struct svc_rqst *rqstp)
1da177e4 557{
57b1d3ba
TT
558 struct svc_sock *svsk =
559 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
bb5cf160 560 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1da177e4 561 struct sk_buff *skb;
bc375ea7
DM
562 union {
563 struct cmsghdr hdr;
564 long all[SVC_PKTINFO_SPACE / sizeof(long)];
565 } buffer;
566 struct cmsghdr *cmh = &buffer.hdr;
7a37f578
N
567 struct msghdr msg = {
568 .msg_name = svc_addr(rqstp),
569 .msg_control = cmh,
570 .msg_controllen = sizeof(buffer),
571 .msg_flags = MSG_DONTWAIT,
572 };
abc5c44d
CL
573 size_t len;
574 int err;
1da177e4 575
02fc6c36 576 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
1da177e4
LT
577 /* udp sockets need large rcvbuf as all pending
578 * requests are still in that buffer. sndbuf must
579 * also be large enough that there is enough space
3262c816
GB
580 * for one reply per thread. We count all threads
581 * rather than threads in a particular pool, which
582 * provides an upper bound on the number of threads
583 * which will access the socket.
1da177e4 584 */
b7e5034c 585 svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
1da177e4 586
02fc6c36 587 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
05ed690e
N
588 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
589 0, 0, MSG_PEEK | MSG_DONTWAIT);
fff1ebb2
CL
590 if (err < 0)
591 goto out_recv_err;
ec095263 592 skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err);
fff1ebb2
CL
593 if (!skb)
594 goto out_recv_err;
595
9dbc240f 596 len = svc_addr_len(svc_addr(rqstp));
9dbc240f 597 rqstp->rq_addrlen = len;
2456e855 598 if (skb->tstamp == 0) {
b7aa0bf7 599 skb->tstamp = ktime_get_real();
cca5172a 600 /* Don't enable netstamp, sunrpc doesn't
1da177e4
LT
601 need that much accuracy */
602 }
3a0ed3e9 603 sock_write_timestamp(svsk->sk_sk, skb->tstamp);
02fc6c36 604 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
1da177e4 605
fff1ebb2 606 len = skb->len;
1da177e4 607 rqstp->rq_arg.len = len;
fff1ebb2 608 trace_svcsock_udp_recv(&svsk->sk_xprt, len);
1da177e4 609
95756482 610 rqstp->rq_prot = IPPROTO_UDP;
27459f09 611
fff1ebb2
CL
612 if (!svc_udp_get_dest_address(rqstp, cmh))
613 goto out_cmsg_err;
849a1cf1 614 rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
1da177e4
LT
615
616 if (skb_is_nonlinear(skb)) {
617 /* we have to copy */
618 local_bh_disable();
fff1ebb2
CL
619 if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb))
620 goto out_bh_enable;
1da177e4 621 local_bh_enable();
850cbadd 622 consume_skb(skb);
1da177e4
LT
623 } else {
624 /* we can use it in-place */
1da8c681 625 rqstp->rq_arg.head[0].iov_base = skb->data;
1da177e4 626 rqstp->rq_arg.head[0].iov_len = len;
22911fc5
ED
627 if (skb_checksum_complete(skb))
628 goto out_free;
5148bf4e 629 rqstp->rq_xprt_ctxt = skb;
1da177e4
LT
630 }
631
632 rqstp->rq_arg.page_base = 0;
633 if (len <= rqstp->rq_arg.head[0].iov_len) {
634 rqstp->rq_arg.head[0].iov_len = len;
635 rqstp->rq_arg.page_len = 0;
44524359 636 rqstp->rq_respages = rqstp->rq_pages+1;
1da177e4
LT
637 } else {
638 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
44524359 639 rqstp->rq_respages = rqstp->rq_pages + 1 +
172589cc 640 DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
1da177e4 641 }
afc59400 642 rqstp->rq_next_page = rqstp->rq_respages+1;
1da177e4
LT
643
644 if (serv->sv_stats)
645 serv->sv_stats->netudpcnt++;
646
319951eb 647 svc_sock_secure_port(rqstp);
82011c80 648 svc_xprt_received(rqstp->rq_xprt);
1da177e4 649 return len;
fff1ebb2
CL
650
651out_recv_err:
652 if (err != -EAGAIN) {
653 /* possibly an icmp error */
654 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
655 }
656 trace_svcsock_udp_recv_err(&svsk->sk_xprt, err);
82011c80 657 goto out_clear_busy;
fff1ebb2
CL
658out_cmsg_err:
659 net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
660 cmh->cmsg_level, cmh->cmsg_type);
661 goto out_free;
662out_bh_enable:
663 local_bh_enable();
f23abfdb 664out_free:
850cbadd 665 kfree_skb(skb);
82011c80
CL
666out_clear_busy:
667 svc_xprt_received(rqstp->rq_xprt);
f23abfdb 668 return 0;
1da177e4
LT
669}
670
da1661b9
CL
671/**
672 * svc_udp_sendto - Send out a reply on a UDP socket
673 * @rqstp: completed svc_rqst
674 *
ca4faf54
CL
675 * xpt_mutex ensures @rqstp's whole message is written to the socket
676 * without interruption.
677 *
da1661b9
CL
678 * Returns the number of bytes sent, or a negative errno.
679 */
680static int svc_udp_sendto(struct svc_rqst *rqstp)
1da177e4 681{
da1661b9
CL
682 struct svc_xprt *xprt = rqstp->rq_xprt;
683 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
684 struct xdr_buf *xdr = &rqstp->rq_res;
685 union {
686 struct cmsghdr hdr;
687 long all[SVC_PKTINFO_SPACE / sizeof(long)];
688 } buffer;
689 struct cmsghdr *cmh = &buffer.hdr;
690 struct msghdr msg = {
691 .msg_name = &rqstp->rq_addr,
692 .msg_namelen = rqstp->rq_addrlen,
693 .msg_control = cmh,
694 .msg_controllen = sizeof(buffer),
695 };
3f649ab7 696 unsigned int sent;
da1661b9 697 int err;
1da177e4 698
948f072a
N
699 svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
700 rqstp->rq_xprt_ctxt = NULL;
23cf1ee1 701
da1661b9 702 svc_set_cmsg_data(rqstp, cmh);
1da177e4 703
ca4faf54
CL
704 mutex_lock(&xprt->xpt_mutex);
705
706 if (svc_xprt_is_dead(xprt))
707 goto out_notconn;
708
ff053dbb
TM
709 err = xdr_alloc_bvec(xdr, GFP_KERNEL);
710 if (err < 0)
711 goto out_unlock;
712
da1661b9 713 err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
da1661b9
CL
714 if (err == -ECONNREFUSED) {
715 /* ICMP error on earlier request. */
716 err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
da1661b9 717 }
ff053dbb 718 xdr_free_bvec(xdr);
998024de 719 trace_svcsock_udp_send(xprt, err);
ff053dbb 720out_unlock:
ca4faf54 721 mutex_unlock(&xprt->xpt_mutex);
da1661b9
CL
722 if (err < 0)
723 return err;
724 return sent;
ca4faf54
CL
725
726out_notconn:
727 mutex_unlock(&xprt->xpt_mutex);
728 return -ENOTCONN;
1da177e4
LT
729}
730
323bee32
TT
731static int svc_udp_has_wspace(struct svc_xprt *xprt)
732{
733 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
bb5cf160 734 struct svc_serv *serv = xprt->xpt_server;
323bee32
TT
735 unsigned long required;
736
737 /*
738 * Set the SOCK_NOSPACE flag before checking the available
739 * sock space.
740 */
741 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
7a90e8cc 742 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
323bee32
TT
743 if (required*2 > sock_wspace(svsk->sk_sk))
744 return 0;
745 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
746 return 1;
747}
748
38a417cc
TT
749static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
750{
751 BUG();
752 return NULL;
753}
754
ea08e392
SM
755static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt)
756{
757}
758
b700cbb1 759static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
62832c03 760 struct net *net,
b700cbb1
TT
761 struct sockaddr *sa, int salen,
762 int flags)
763{
62832c03 764 return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
b700cbb1
TT
765}
766
2412e927 767static const struct svc_xprt_ops svc_udp_ops = {
b700cbb1 768 .xpo_create = svc_udp_create,
5d137990
TT
769 .xpo_recvfrom = svc_udp_recvfrom,
770 .xpo_sendto = svc_udp_sendto,
03493bca 771 .xpo_result_payload = svc_sock_result_payload,
948f072a 772 .xpo_release_ctxt = svc_udp_release_ctxt,
755cceab
TT
773 .xpo_detach = svc_sock_detach,
774 .xpo_free = svc_sock_free,
323bee32 775 .xpo_has_wspace = svc_udp_has_wspace,
38a417cc 776 .xpo_accept = svc_udp_accept,
ea08e392 777 .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
360d8738
TT
778};
779
780static struct svc_xprt_class svc_udp_class = {
781 .xcl_name = "udp",
b700cbb1 782 .xcl_owner = THIS_MODULE,
360d8738 783 .xcl_ops = &svc_udp_ops,
49023155 784 .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
3c45ddf8 785 .xcl_ident = XPRT_TRANSPORT_UDP,
360d8738
TT
786};
787
bb5cf160 788static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
1da177e4 789{
bd4620dd
SK
790 svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
791 &svsk->sk_xprt, serv);
def13d74 792 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
fa9251af 793 svsk->sk_sk->sk_data_ready = svc_data_ready;
1da177e4 794 svsk->sk_sk->sk_write_space = svc_write_space;
1da177e4
LT
795
796 /* initialise setting must have enough space to
cca5172a 797 * receive and respond to one request.
1da177e4
LT
798 * svc_udp_recvfrom will re-adjust if necessary
799 */
b7e5034c 800 svc_sock_setbufsize(svsk, 3);
1da177e4 801
0f0257ea
TT
802 /* data might have come in before data_ready set up */
803 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
02fc6c36 804 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
7a37f578 805
7a37f578 806 /* make sure we get destination address info */
7702ce40
CL
807 switch (svsk->sk_sk->sk_family) {
808 case AF_INET:
c1f9ec57 809 ip_sock_set_pktinfo(svsk->sk_sock->sk);
7d7207c2 810 break;
7702ce40 811 case AF_INET6:
7d7207c2 812 ip6_sock_set_recvpktinfo(svsk->sk_sock->sk);
7702ce40
CL
813 break;
814 default:
815 BUG();
816 }
1da177e4
LT
817}
818
819/*
820 * A data_ready event on a listening socket means there's a connection
821 * pending. Do not use state_change as a substitute for it.
822 */
676d2369 823static void svc_tcp_listen_data_ready(struct sock *sk)
1da177e4 824{
939bb7ef 825 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
1da177e4 826
40e0b090
PY
827 trace_sk_data_ready(sk);
828
eebe53e8
VL
829 if (svsk) {
830 /* Refer to svc_setup_socket() for details. */
831 rmb();
fa9251af 832 svsk->sk_odata(sk);
eebe53e8
VL
833 }
834
939bb7ef
NB
835 /*
836 * This callback may called twice when a new connection
837 * is established as a child socket inherits everything
838 * from a parent LISTEN socket.
839 * 1) data_ready method of the parent socket will be called
840 * when one of child sockets become ESTABLISHED.
841 * 2) data_ready method of the child socket may be called
842 * when it receives data before the socket is accepted.
843 * In case of 2, we should ignore it silently.
844 */
845 if (sk->sk_state == TCP_LISTEN) {
846 if (svsk) {
02fc6c36 847 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
f6150c3c 848 svc_xprt_enqueue(&svsk->sk_xprt);
a0469f46 849 }
1da177e4 850 }
1da177e4
LT
851}
852
853/*
854 * A state change on a connected socket means it's dying or dead.
855 */
0f0257ea 856static void svc_tcp_state_change(struct sock *sk)
1da177e4 857{
939bb7ef 858 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
1da177e4 859
a0469f46 860 if (svsk) {
eebe53e8
VL
861 /* Refer to svc_setup_socket() for details. */
862 rmb();
fa9251af 863 svsk->sk_ostate(sk);
a0469f46 864 trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock);
e844d307
CL
865 if (sk->sk_state != TCP_ESTABLISHED)
866 svc_xprt_deferred_close(&svsk->sk_xprt);
1da177e4 867 }
1da177e4
LT
868}
869
870/*
871 * Accept a TCP connection
872 */
38a417cc 873static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
1da177e4 874{
38a417cc 875 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
cdd88b9f 876 struct sockaddr_storage addr;
877 struct sockaddr *sin = (struct sockaddr *) &addr;
bb5cf160 878 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1da177e4
LT
879 struct socket *sock = svsk->sk_sock;
880 struct socket *newsock;
1da177e4
LT
881 struct svc_sock *newsvsk;
882 int err, slen;
883
1da177e4 884 if (!sock)
38a417cc 885 return NULL;
1da177e4 886
02fc6c36 887 clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
e6242e92
SS
888 err = kernel_accept(sock, &newsock, O_NONBLOCK);
889 if (err < 0) {
1da177e4
LT
890 if (err == -ENOMEM)
891 printk(KERN_WARNING "%s: no more sockets!\n",
892 serv->sv_name);
e87cc472
JP
893 else if (err != -EAGAIN)
894 net_warn_ratelimited("%s: accept failed (err %d)!\n",
895 serv->sv_name, -err);
a0469f46 896 trace_svcsock_accept_err(xprt, serv->sv_name, err);
38a417cc 897 return NULL;
1da177e4 898 }
319050d4
CL
899 if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL)))
900 return NULL;
901
02fc6c36 902 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1da177e4 903
9b2c45d4 904 err = kernel_getpeername(newsock, sin);
1da177e4 905 if (err < 0) {
a0469f46 906 trace_svcsock_getpeername_err(xprt, serv->sv_name, err);
1da177e4
LT
907 goto failed; /* aborted connection or whatever */
908 }
9b2c45d4 909 slen = err;
1da177e4 910
fa9251af
TM
911 /* Reset the inherited callbacks before calling svc_setup_socket */
912 newsock->sk->sk_state_change = svsk->sk_ostate;
913 newsock->sk->sk_data_ready = svsk->sk_odata;
914 newsock->sk->sk_write_space = svsk->sk_owspace;
915
1da177e4
LT
916 /* make sure that a write doesn't block forever when
917 * low on memory
918 */
919 newsock->sk->sk_sndtimeo = HZ*30;
920
72c35376
BF
921 newsvsk = svc_setup_socket(serv, newsock,
922 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY));
923 if (IS_ERR(newsvsk))
1da177e4 924 goto failed;
9dbc240f 925 svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
9b2c45d4
DV
926 err = kernel_getsockname(newsock, sin);
927 slen = err;
a0469f46 928 if (unlikely(err < 0))
a9747692 929 slen = offsetof(struct sockaddr, sa_data);
9dbc240f 930 svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
067d7817 931
ef11ce24
N
932 if (sock_is_loopback(newsock->sk))
933 set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
934 else
935 clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
f9f3cc4f
TT
936 if (serv->sv_stats)
937 serv->sv_stats->nettcpconn++;
938
939 return &newsvsk->sk_xprt;
940
941failed:
319050d4 942 sockfd_put(newsock);
f9f3cc4f
TT
943 return NULL;
944}
945
ca07eda3
CL
946static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
947 struct svc_rqst *rqstp)
31d68ef6 948{
ca07eda3
CL
949 size_t len = svsk->sk_datalen;
950 unsigned int i, npages;
31d68ef6 951
ca07eda3 952 if (!len)
31d68ef6 953 return 0;
31d68ef6
BF
954 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
955 for (i = 0; i < npages; i++) {
956 if (rqstp->rq_pages[i] != NULL)
957 put_page(rqstp->rq_pages[i]);
958 BUG_ON(svsk->sk_pages[i] == NULL);
959 rqstp->rq_pages[i] = svsk->sk_pages[i];
960 svsk->sk_pages[i] = NULL;
961 }
962 rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
963 return len;
964}
965
966static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
967{
968 unsigned int i, len, npages;
969
8af345f5 970 if (svsk->sk_datalen == 0)
31d68ef6 971 return;
8af345f5 972 len = svsk->sk_datalen;
31d68ef6
BF
973 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
974 for (i = 0; i < npages; i++) {
975 svsk->sk_pages[i] = rqstp->rq_pages[i];
976 rqstp->rq_pages[i] = NULL;
977 }
978}
979
980static void svc_tcp_clear_pages(struct svc_sock *svsk)
981{
982 unsigned int i, len, npages;
983
8af345f5 984 if (svsk->sk_datalen == 0)
31d68ef6 985 goto out;
8af345f5 986 len = svsk->sk_datalen;
31d68ef6
BF
987 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
988 for (i = 0; i < npages; i++) {
cf3aa02c
BF
989 if (svsk->sk_pages[i] == NULL) {
990 WARN_ON_ONCE(1);
991 continue;
992 }
31d68ef6
BF
993 put_page(svsk->sk_pages[i]);
994 svsk->sk_pages[i] = NULL;
995 }
996out:
997 svsk->sk_tcplen = 0;
8af345f5 998 svsk->sk_datalen = 0;
31d68ef6
BF
999}
1000
1da177e4 1001/*
a5cda73e 1002 * Receive fragment record header into sk_marker.
1da177e4 1003 */
a5cda73e
CL
1004static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
1005 struct svc_rqst *rqstp)
1da177e4 1006{
a5cda73e 1007 ssize_t want, len;
1da177e4 1008
a5cda73e
CL
1009 /* If we haven't gotten the record length yet,
1010 * get the next four bytes.
1011 */
c0401ea0 1012 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
a5cda73e 1013 struct msghdr msg = { NULL };
1da177e4
LT
1014 struct kvec iov;
1015
5ee78d48 1016 want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
02648908 1017 iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
1da177e4 1018 iov.iov_len = want;
de4eda9d 1019 iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want);
5e052dda 1020 len = svc_tcp_sock_recv_cmsg(svsk, &msg);
4c8e5537 1021 if (len < 0)
a5cda73e 1022 return len;
1da177e4 1023 svsk->sk_tcplen += len;
1da177e4 1024 if (len < want) {
a5cda73e
CL
1025 /* call again to read the remaining bytes */
1026 goto err_short;
1da177e4 1027 }
a5cda73e 1028 trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker);
836fbadb 1029 if (svc_sock_reclen(svsk) + svsk->sk_datalen >
a5cda73e
CL
1030 svsk->sk_xprt.xpt_server->sv_max_mesg)
1031 goto err_too_large;
1da177e4 1032 }
cc248d4b 1033 return svc_sock_reclen(svsk);
a5cda73e
CL
1034
1035err_too_large:
1036 net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
1037 __func__, svsk->sk_xprt.xpt_server->sv_name,
1038 svc_sock_reclen(svsk));
e844d307 1039 svc_xprt_deferred_close(&svsk->sk_xprt);
a5cda73e 1040err_short:
8f55f3c0
AB
1041 return -EAGAIN;
1042}
1043
586c52cc 1044static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
4cfc7e60 1045{
586c52cc 1046 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
4cfc7e60 1047 struct rpc_rqst *req = NULL;
586c52cc
TM
1048 struct kvec *src, *dst;
1049 __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
48e6555c
BF
1050 __be32 xid;
1051 __be32 calldir;
4cfc7e60 1052
4cfc7e60
RI
1053 xid = *p++;
1054 calldir = *p;
1055
093a1468 1056 if (!bc_xprt)
586c52cc 1057 return -EAGAIN;
75c84151 1058 spin_lock(&bc_xprt->queue_lock);
093a1468
TM
1059 req = xprt_lookup_rqst(bc_xprt, xid);
1060 if (!req)
1061 goto unlock_notfound;
586c52cc
TM
1062
1063 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
1064 /*
1065 * XXX!: cheating for now! Only copying HEAD.
1066 * But we know this is good enough for now (in fact, for any
1067 * callback reply in the forseeable future).
1068 */
1069 dst = &req->rq_private_buf.head[0];
1070 src = &rqstp->rq_arg.head[0];
1071 if (dst->iov_len < src->iov_len)
093a1468 1072 goto unlock_eagain; /* whatever; just giving up. */
586c52cc 1073 memcpy(dst->iov_base, src->iov_base, src->iov_len);
cc248d4b 1074 xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
586c52cc 1075 rqstp->rq_arg.len = 0;
75c84151 1076 spin_unlock(&bc_xprt->queue_lock);
586c52cc 1077 return 0;
093a1468
TM
1078unlock_notfound:
1079 printk(KERN_NOTICE
1080 "%s: Got unrecognized reply: "
1081 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1082 __func__, ntohl(calldir),
1083 bc_xprt, ntohl(xid));
1084unlock_eagain:
75c84151 1085 spin_unlock(&bc_xprt->queue_lock);
093a1468 1086 return -EAGAIN;
586c52cc
TM
1087}
1088
836fbadb
BF
1089static void svc_tcp_fragment_received(struct svc_sock *svsk)
1090{
1091 /* If we have more data, signal svc_xprt_enqueue() to try again */
836fbadb 1092 svsk->sk_tcplen = 0;
02648908 1093 svsk->sk_marker = xdr_zero;
836fbadb 1094}
31d68ef6 1095
ca07eda3
CL
1096/**
1097 * svc_tcp_recvfrom - Receive data from a TCP socket
1098 * @rqstp: request structure into which to receive an RPC Call
1099 *
1100 * Called in a loop when XPT_DATA has been set.
1101 *
1102 * Read the 4-byte stream record marker, then use the record length
1103 * in that marker to set up exactly the resources needed to receive
1104 * the next RPC message into @rqstp.
1105 *
1106 * Returns:
1107 * On success, the number of bytes in a received RPC Call, or
1108 * %0 if a complete RPC Call message was not ready to return
1109 *
1110 * The zero return case handles partial receives and callback Replies.
1111 * The state of a partial receive is preserved in the svc_sock for
1112 * the next call to svc_tcp_recvfrom.
8f55f3c0
AB
1113 */
1114static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1115{
1116 struct svc_sock *svsk =
1117 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
1118 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
ca07eda3
CL
1119 size_t want, base;
1120 ssize_t len;
586c52cc
TM
1121 __be32 *p;
1122 __be32 calldir;
8f55f3c0 1123
a5cda73e
CL
1124 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1125 len = svc_tcp_read_marker(svsk, rqstp);
8f55f3c0
AB
1126 if (len < 0)
1127 goto error;
1128
31d68ef6 1129 base = svc_tcp_restore_pages(svsk, rqstp);
a5cda73e 1130 want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
ca07eda3 1131 len = svc_tcp_read_msg(rqstp, base + want, base);
8af345f5 1132 if (len >= 0) {
7dae1dd7 1133 trace_svcsock_tcp_recv(&svsk->sk_xprt, len);
31d68ef6 1134 svsk->sk_tcplen += len;
8af345f5
BF
1135 svsk->sk_datalen += len;
1136 }
6be8c594
CL
1137 if (len != want || !svc_sock_final_rec(svsk))
1138 goto err_incomplete;
1139 if (svsk->sk_datalen < 8)
1140 goto err_nuts;
ad46ccf0 1141
836fbadb 1142 rqstp->rq_arg.len = svsk->sk_datalen;
1da177e4 1143 rqstp->rq_arg.page_base = 0;
5ee78d48
TM
1144 if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
1145 rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
1da177e4 1146 rqstp->rq_arg.page_len = 0;
5ee78d48
TM
1147 } else
1148 rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1da177e4 1149
5148bf4e 1150 rqstp->rq_xprt_ctxt = NULL;
1da177e4 1151 rqstp->rq_prot = IPPROTO_TCP;
7501cc2b 1152 if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
7827c81f 1153 set_bit(RQ_LOCAL, &rqstp->rq_flags);
7501cc2b 1154 else
7827c81f 1155 clear_bit(RQ_LOCAL, &rqstp->rq_flags);
1da177e4 1156
586c52cc
TM
1157 p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
1158 calldir = p[1];
8985ef0b 1159 if (calldir)
586c52cc 1160 len = receive_cb_reply(svsk, rqstp);
586c52cc 1161
1da177e4 1162 /* Reset TCP read info */
8af345f5 1163 svsk->sk_datalen = 0;
836fbadb 1164 svc_tcp_fragment_received(svsk);
0601f793 1165
8985ef0b
BF
1166 if (len < 0)
1167 goto error;
1da177e4 1168
9dbc240f 1169 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
1da177e4
LT
1170 if (serv->sv_stats)
1171 serv->sv_stats->nettcpcnt++;
1172
319951eb 1173 svc_sock_secure_port(rqstp);
82011c80 1174 svc_xprt_received(rqstp->rq_xprt);
5ee78d48 1175 return rqstp->rq_arg.len;
1da177e4 1176
6be8c594
CL
1177err_incomplete:
1178 svc_tcp_save_pages(svsk, rqstp);
1179 if (len < 0 && len != -EAGAIN)
1180 goto err_delete;
1181 if (len == want)
1182 svc_tcp_fragment_received(svsk);
1183 else
1184 trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
1185 svc_sock_reclen(svsk),
1186 svsk->sk_tcplen - sizeof(rpc_fraghdr));
1187 goto err_noclose;
8f55f3c0 1188error:
31d68ef6 1189 if (len != -EAGAIN)
ad46ccf0 1190 goto err_delete;
6be8c594 1191 trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
82011c80 1192 goto err_noclose;
6be8c594
CL
1193err_nuts:
1194 svsk->sk_datalen = 0;
ad46ccf0 1195err_delete:
6be8c594 1196 trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
e844d307 1197 svc_xprt_deferred_close(&svsk->sk_xprt);
31d68ef6 1198err_noclose:
82011c80 1199 svc_xprt_received(rqstp->rq_xprt);
9f9d2ebe 1200 return 0; /* record not complete */
1da177e4
LT
1201}
1202
4a85a6a3
CL
1203static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
1204 int flags)
1205{
1206 return kernel_sendpage(sock, virt_to_page(vec->iov_base),
1207 offset_in_page(vec->iov_base),
1208 vec->iov_len, flags);
1209}
1210
1211/*
1212 * kernel_sendpage() is used exclusively to reduce the number of
1213 * copy operations in this path. Therefore the caller must ensure
1214 * that the pages backing @xdr are unchanging.
1215 *
1216 * In addition, the logic assumes that * .bv_len is never larger
1217 * than PAGE_SIZE.
1218 */
4d12b727
CL
1219static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
1220 rpc_fraghdr marker, unsigned int *sentp)
4a85a6a3
CL
1221{
1222 const struct kvec *head = xdr->head;
1223 const struct kvec *tail = xdr->tail;
1224 struct kvec rm = {
1225 .iov_base = &marker,
1226 .iov_len = sizeof(marker),
1227 };
4d12b727
CL
1228 struct msghdr msg = {
1229 .msg_flags = 0,
1230 };
987c7b1d 1231 int ret;
4a85a6a3
CL
1232
1233 *sentp = 0;
b056fa07
TM
1234 ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
1235 if (ret < 0)
1236 return ret;
4a85a6a3 1237
4d12b727 1238 ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
4a85a6a3
CL
1239 if (ret < 0)
1240 return ret;
1241 *sentp += ret;
1242 if (ret != rm.iov_len)
1243 return -EAGAIN;
1244
987c7b1d 1245 ret = svc_tcp_send_kvec(sock, head, 0);
4a85a6a3
CL
1246 if (ret < 0)
1247 return ret;
1248 *sentp += ret;
1249 if (ret != head->iov_len)
1250 goto out;
1251
1252 if (xdr->page_len) {
1253 unsigned int offset, len, remaining;
1254 struct bio_vec *bvec;
1255
bad4c6eb
CL
1256 bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
1257 offset = offset_in_page(xdr->page_base);
4a85a6a3 1258 remaining = xdr->page_len;
4a85a6a3 1259 while (remaining > 0) {
bad4c6eb 1260 len = min(remaining, bvec->bv_len - offset);
4a85a6a3
CL
1261 ret = kernel_sendpage(sock, bvec->bv_page,
1262 bvec->bv_offset + offset,
987c7b1d 1263 len, 0);
4a85a6a3
CL
1264 if (ret < 0)
1265 return ret;
1266 *sentp += ret;
1267 if (ret != len)
1268 goto out;
1269 remaining -= len;
1270 offset = 0;
1271 bvec++;
1272 }
1273 }
1274
1275 if (tail->iov_len) {
1276 ret = svc_tcp_send_kvec(sock, tail, 0);
1277 if (ret < 0)
1278 return ret;
1279 *sentp += ret;
1280 }
1281
1282out:
1283 return 0;
1284}
1285
da1661b9
CL
1286/**
1287 * svc_tcp_sendto - Send out a reply on a TCP socket
1288 * @rqstp: completed svc_rqst
1289 *
ca4faf54
CL
1290 * xpt_mutex ensures @rqstp's whole message is written to the socket
1291 * without interruption.
1292 *
da1661b9 1293 * Returns the number of bytes sent, or a negative errno.
1da177e4 1294 */
0f0257ea 1295static int svc_tcp_sendto(struct svc_rqst *rqstp)
1da177e4 1296{
da1661b9
CL
1297 struct svc_xprt *xprt = rqstp->rq_xprt;
1298 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1299 struct xdr_buf *xdr = &rqstp->rq_res;
1300 rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
1301 (u32)xdr->len);
3f649ab7 1302 unsigned int sent;
da1661b9 1303 int err;
1da177e4 1304
948f072a
N
1305 svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
1306 rqstp->rq_xprt_ctxt = NULL;
23cf1ee1 1307
e0a912e8 1308 atomic_inc(&svsk->sk_sendqlen);
ca4faf54
CL
1309 mutex_lock(&xprt->xpt_mutex);
1310 if (svc_xprt_is_dead(xprt))
1311 goto out_notconn;
e0a912e8 1312 tcp_sock_set_cork(svsk->sk_sk, true);
4d12b727 1313 err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
da1661b9 1314 xdr_free_bvec(xdr);
cb579086 1315 trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
da1661b9
CL
1316 if (err < 0 || sent != (xdr->len + sizeof(marker)))
1317 goto out_close;
e0a912e8
TM
1318 if (atomic_dec_and_test(&svsk->sk_sendqlen))
1319 tcp_sock_set_cork(svsk->sk_sk, false);
ca4faf54 1320 mutex_unlock(&xprt->xpt_mutex);
1da177e4 1321 return sent;
da1661b9 1322
ca4faf54 1323out_notconn:
e0a912e8 1324 atomic_dec(&svsk->sk_sendqlen);
ca4faf54
CL
1325 mutex_unlock(&xprt->xpt_mutex);
1326 return -ENOTCONN;
da1661b9
CL
1327out_close:
1328 pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
1329 xprt->xpt_server->sv_name,
1330 (err < 0) ? "got error" : "sent",
1331 (err < 0) ? err : sent, xdr->len);
e844d307 1332 svc_xprt_deferred_close(xprt);
e0a912e8 1333 atomic_dec(&svsk->sk_sendqlen);
ca4faf54 1334 mutex_unlock(&xprt->xpt_mutex);
da1661b9 1335 return -EAGAIN;
1da177e4
LT
1336}
1337
b700cbb1 1338static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
62832c03 1339 struct net *net,
b700cbb1
TT
1340 struct sockaddr *sa, int salen,
1341 int flags)
1342{
62832c03 1343 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
b700cbb1
TT
1344}
1345
2412e927 1346static const struct svc_xprt_ops svc_tcp_ops = {
b700cbb1 1347 .xpo_create = svc_tcp_create,
5d137990
TT
1348 .xpo_recvfrom = svc_tcp_recvfrom,
1349 .xpo_sendto = svc_tcp_sendto,
03493bca 1350 .xpo_result_payload = svc_sock_result_payload,
948f072a 1351 .xpo_release_ctxt = svc_tcp_release_ctxt,
69b6ba37 1352 .xpo_detach = svc_tcp_sock_detach,
755cceab 1353 .xpo_free = svc_sock_free,
323bee32 1354 .xpo_has_wspace = svc_tcp_has_wspace,
38a417cc 1355 .xpo_accept = svc_tcp_accept,
ea08e392 1356 .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
b3cbf98e 1357 .xpo_handshake = svc_tcp_handshake,
360d8738
TT
1358};
1359
1360static struct svc_xprt_class svc_tcp_class = {
1361 .xcl_name = "tcp",
b700cbb1 1362 .xcl_owner = THIS_MODULE,
360d8738 1363 .xcl_ops = &svc_tcp_ops,
49023155 1364 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
3c45ddf8 1365 .xcl_ident = XPRT_TRANSPORT_TCP,
360d8738
TT
1366};
1367
1368void svc_init_xprt_sock(void)
1369{
1370 svc_reg_xprt_class(&svc_tcp_class);
1371 svc_reg_xprt_class(&svc_udp_class);
1372}
1373
1374void svc_cleanup_xprt_sock(void)
1375{
1376 svc_unreg_xprt_class(&svc_tcp_class);
1377 svc_unreg_xprt_class(&svc_udp_class);
1378}
1379
bb5cf160 1380static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1da177e4
LT
1381{
1382 struct sock *sk = svsk->sk_sk;
1da177e4 1383
bd4620dd
SK
1384 svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
1385 &svsk->sk_xprt, serv);
def13d74 1386 set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
362142b2 1387 set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
1da177e4 1388 if (sk->sk_state == TCP_LISTEN) {
ece200dd 1389 strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
02fc6c36 1390 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
1da177e4 1391 sk->sk_data_ready = svc_tcp_listen_data_ready;
02fc6c36 1392 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1da177e4 1393 } else {
1da177e4 1394 sk->sk_state_change = svc_tcp_state_change;
fa9251af 1395 sk->sk_data_ready = svc_data_ready;
637600f3 1396 sk->sk_write_space = svc_write_space;
1da177e4 1397
02648908 1398 svsk->sk_marker = xdr_zero;
1da177e4 1399 svsk->sk_tcplen = 0;
8af345f5 1400 svsk->sk_datalen = 0;
31d68ef6 1401 memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
1da177e4 1402
e0a912e8 1403 tcp_sock_set_nodelay(sk);
1da177e4 1404
02fc6c36 1405 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
b2f21f7d
TM
1406 switch (sk->sk_state) {
1407 case TCP_SYN_RECV:
1408 case TCP_ESTABLISHED:
1409 break;
1410 default:
e844d307 1411 svc_xprt_deferred_close(&svsk->sk_xprt);
b2f21f7d 1412 }
1da177e4
LT
1413 }
1414}
1415
0f0257ea 1416void svc_sock_update_bufs(struct svc_serv *serv)
1da177e4
LT
1417{
1418 /*
1419 * The number of server threads has changed. Update
1420 * rcvbuf and sndbuf accordingly on all sockets
1421 */
8f3a6de3 1422 struct svc_sock *svsk;
1da177e4
LT
1423
1424 spin_lock_bh(&serv->sv_lock);
8f3a6de3 1425 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
02fc6c36 1426 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1da177e4
LT
1427 spin_unlock_bh(&serv->sv_lock);
1428}
24c3767e 1429EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
1da177e4 1430
1da177e4
LT
1431/*
1432 * Initialize socket for RPC use and create svc_sock struct
1da177e4 1433 */
6b174337
CL
1434static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1435 struct socket *sock,
72c35376 1436 int flags)
1da177e4
LT
1437{
1438 struct svc_sock *svsk;
1439 struct sock *inet;
6b174337 1440 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1da177e4 1441
72c35376
BF
1442 svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
1443 if (!svsk)
1444 return ERR_PTR(-ENOMEM);
1da177e4
LT
1445
1446 inet = sock->sk;
1447
ae0d7770
CL
1448 if (pmap_register) {
1449 int err;
1450
72c35376 1451 err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
5247fab5 1452 inet->sk_protocol,
c720c7e8 1453 ntohs(inet_sk(inet)->inet_sport));
ae0d7770 1454 if (err < 0) {
ae0d7770
CL
1455 kfree(svsk);
1456 return ERR_PTR(err);
1457 }
1da177e4
LT
1458 }
1459
1da177e4
LT
1460 svsk->sk_sock = sock;
1461 svsk->sk_sk = inet;
1462 svsk->sk_ostate = inet->sk_state_change;
1463 svsk->sk_odata = inet->sk_data_ready;
1464 svsk->sk_owspace = inet->sk_write_space;
eebe53e8
VL
1465 /*
1466 * This barrier is necessary in order to prevent race condition
1467 * with svc_data_ready(), svc_listen_data_ready() and others
1468 * when calling callbacks above.
1469 */
1470 wmb();
1471 inet->sk_user_data = svsk;
1da177e4
LT
1472
1473 /* Initialize the socket */
1474 if (sock->type == SOCK_DGRAM)
bb5cf160 1475 svc_udp_init(svsk, serv);
637600f3 1476 else
bb5cf160 1477 svc_tcp_init(svsk, serv);
1da177e4 1478
998024de 1479 trace_svcsock_new_socket(sock);
1da177e4
LT
1480 return svsk;
1481}
1482
30646394
SK
1483bool svc_alien_sock(struct net *net, int fd)
1484{
1485 int err;
1486 struct socket *sock = sockfd_lookup(fd, &err);
1487 bool ret = false;
1488
1489 if (!sock)
1490 goto out;
1491 if (sock_net(sock->sk) != net)
1492 ret = true;
1493 sockfd_put(sock);
1494out:
1495 return ret;
1496}
1497EXPORT_SYMBOL_GPL(svc_alien_sock);
1498
bfba9ab4
CL
1499/**
1500 * svc_addsock - add a listener socket to an RPC service
1501 * @serv: pointer to RPC service to which to add a new listener
1502 * @fd: file descriptor of the new listener
1503 * @name_return: pointer to buffer to fill in with name of listener
1504 * @len: size of the buffer
4df493a2 1505 * @cred: credential
bfba9ab4
CL
1506 *
1507 * Fills in socket name and returns positive length of name if successful.
1508 * Name is terminated with '\n'. On error, returns a negative errno
1509 * value.
1510 */
1511int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
4df493a2 1512 const size_t len, const struct cred *cred)
b41b66d6
N
1513{
1514 int err = 0;
1515 struct socket *so = sockfd_lookup(fd, &err);
1516 struct svc_sock *svsk = NULL;
a8e10078
BF
1517 struct sockaddr_storage addr;
1518 struct sockaddr *sin = (struct sockaddr *)&addr;
1519 int salen;
b41b66d6
N
1520
1521 if (!so)
1522 return err;
a8e10078 1523 err = -EAFNOSUPPORT;
205ba423 1524 if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
a8e10078
BF
1525 goto out;
1526 err = -EPROTONOSUPPORT;
1527 if (so->sk->sk_protocol != IPPROTO_TCP &&
b41b66d6 1528 so->sk->sk_protocol != IPPROTO_UDP)
a8e10078
BF
1529 goto out;
1530 err = -EISCONN;
1531 if (so->state > SS_UNCONNECTED)
1532 goto out;
1533 err = -ENOENT;
1534 if (!try_module_get(THIS_MODULE))
1535 goto out;
1536 svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS);
1537 if (IS_ERR(svsk)) {
1538 module_put(THIS_MODULE);
1539 err = PTR_ERR(svsk);
1540 goto out;
b41b66d6 1541 }
9b2c45d4
DV
1542 salen = kernel_getsockname(svsk->sk_sock, sin);
1543 if (salen >= 0)
a8e10078 1544 svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
4df493a2 1545 svsk->sk_xprt.xpt_cred = get_cred(cred);
39b55301 1546 svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
e7942b9f 1547 return svc_one_sock_name(svsk, name_return, len);
a8e10078
BF
1548out:
1549 sockfd_put(so);
1550 return err;
b41b66d6
N
1551}
1552EXPORT_SYMBOL_GPL(svc_addsock);
1553
1da177e4
LT
1554/*
1555 * Create socket for RPC service.
1556 */
b700cbb1
TT
1557static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1558 int protocol,
62832c03 1559 struct net *net,
b700cbb1
TT
1560 struct sockaddr *sin, int len,
1561 int flags)
1da177e4
LT
1562{
1563 struct svc_sock *svsk;
1564 struct socket *sock;
1565 int error;
1566 int type;
9dbc240f
TT
1567 struct sockaddr_storage addr;
1568 struct sockaddr *newsin = (struct sockaddr *)&addr;
1569 int newlen;
c69da774 1570 int family;
1da177e4
LT
1571
1572 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
1573 printk(KERN_WARNING "svc: only UDP and TCP "
1574 "sockets supported\n");
b700cbb1 1575 return ERR_PTR(-EINVAL);
1da177e4 1576 }
c69da774 1577
1da177e4 1578 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
c69da774
TM
1579 switch (sin->sa_family) {
1580 case AF_INET6:
1581 family = PF_INET6;
1582 break;
1583 case AF_INET:
1584 family = PF_INET;
1585 break;
1586 default:
1587 return ERR_PTR(-EINVAL);
1588 }
1da177e4 1589
14ec63c3 1590 error = __sock_create(net, family, type, protocol, &sock, 1);
77f1f67a 1591 if (error < 0)
b700cbb1 1592 return ERR_PTR(error);
1da177e4 1593
ed07536e
PZ
1594 svc_reclassify_socket(sock);
1595
c69da774
TM
1596 /*
1597 * If this is an PF_INET6 listener, we want to avoid
1598 * getting requests from IPv4 remotes. Those should
1599 * be shunted to a PF_INET listener via rpcbind.
1600 */
c69da774 1601 if (family == PF_INET6)
9b115749 1602 ip6_sock_set_v6only(sock->sk);
18114746 1603 if (type == SOCK_STREAM)
4a17fd52 1604 sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
77f1f67a 1605 error = kernel_bind(sock, sin, len);
18114746
ES
1606 if (error < 0)
1607 goto bummer;
1da177e4 1608
9b2c45d4 1609 error = kernel_getsockname(sock, newsin);
9dbc240f
TT
1610 if (error < 0)
1611 goto bummer;
9b2c45d4 1612 newlen = error;
9dbc240f 1613
1da177e4 1614 if (protocol == IPPROTO_TCP) {
e6242e92 1615 if ((error = kernel_listen(sock, 64)) < 0)
1da177e4
LT
1616 goto bummer;
1617 }
1618
72c35376 1619 svsk = svc_setup_socket(serv, sock, flags);
a8e10078
BF
1620 if (IS_ERR(svsk)) {
1621 error = PTR_ERR(svsk);
1622 goto bummer;
e79eff1f 1623 }
a8e10078
BF
1624 svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
1625 return (struct svc_xprt *)svsk;
1da177e4 1626bummer:
1da177e4 1627 sock_release(sock);
b700cbb1 1628 return ERR_PTR(error);
1da177e4
LT
1629}
1630
755cceab
TT
1631/*
1632 * Detach the svc_sock from the socket so that no
1633 * more callbacks occur.
1634 */
1635static void svc_sock_detach(struct svc_xprt *xprt)
1636{
1637 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1638 struct sock *sk = svsk->sk_sk;
1639
755cceab 1640 /* put back the old socket callbacks */
069c225b 1641 lock_sock(sk);
755cceab
TT
1642 sk->sk_state_change = svsk->sk_ostate;
1643 sk->sk_data_ready = svsk->sk_odata;
1644 sk->sk_write_space = svsk->sk_owspace;
069c225b
TM
1645 sk->sk_user_data = NULL;
1646 release_sock(sk);
69b6ba37
TM
1647}
1648
1649/*
1650 * Disconnect the socket, and reset the callbacks
1651 */
1652static void svc_tcp_sock_detach(struct svc_xprt *xprt)
1653{
1654 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1655
69b6ba37
TM
1656 svc_sock_detach(xprt);
1657
31d68ef6
BF
1658 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
1659 svc_tcp_clear_pages(svsk);
69b6ba37 1660 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
31d68ef6 1661 }
755cceab
TT
1662}
1663
1664/*
1665 * Free the svc_sock's socket resources and the svc_sock itself.
1666 */
1667static void svc_sock_free(struct svc_xprt *xprt)
1668{
1669 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
b3cbf98e 1670 struct socket *sock = svsk->sk_sock;
755cceab 1671
b3cbf98e
CL
1672 tls_handshake_cancel(sock->sk);
1673 if (sock->file)
1674 sockfd_put(sock);
755cceab 1675 else
b3cbf98e 1676 sock_release(sock);
755cceab
TT
1677 kfree(svsk);
1678}