splice: update to new vmsplice-to-user interface
[fio.git] / engines / net.c
CommitLineData
ed92ac0c 1/*
da751ca9
JA
2 * net engine
3 *
4 * IO engine that reads/writes to/from sockets.
5 *
ed92ac0c
JA
6 */
7#include <stdio.h>
8#include <stdlib.h>
9#include <unistd.h>
10#include <errno.h>
11#include <assert.h>
12#include <netinet/in.h>
13#include <arpa/inet.h>
14#include <netdb.h>
5fdd124a 15#include <sys/poll.h>
ed92ac0c
JA
16
17#include "../fio.h"
ed92ac0c 18
b5af8293
JA
19struct netio_data {
20 int listenfd;
21 int send_to_net;
9cce02e8
JA
22 int use_splice;
23 int pipes[2];
b5af8293
JA
24 char host[64];
25 struct sockaddr_in addr;
26};
ed92ac0c
JA
27
28static int fio_netio_prep(struct thread_data *td, struct io_u *io_u)
29{
b5af8293 30 struct netio_data *nd = td->io_ops->data;
ed92ac0c
JA
31 struct fio_file *f = io_u->file;
32
7a6499da
JA
33 /*
34 * Make sure we don't see spurious reads to a receiver, and vice versa
35 */
b5af8293
JA
36 if ((nd->send_to_net && io_u->ddir == DDIR_READ) ||
37 (!nd->send_to_net && io_u->ddir == DDIR_WRITE)) {
e1161c32 38 td_verror(td, EINVAL, "bad direction");
7a6499da 39 return 1;
ed92ac0c 40 }
7a6499da 41
ed92ac0c
JA
42 if (io_u->ddir == DDIR_SYNC)
43 return 0;
44 if (io_u->offset == f->last_completed_pos)
45 return 0;
46
e01547d2
JA
47 /*
48 * If offset is different from last end position, it's a seek.
49 * As network io is purely sequential, we don't allow seeks.
50 */
e1161c32 51 td_verror(td, EINVAL, "cannot seek");
ed92ac0c
JA
52 return 1;
53}
54
cd963e18 55static int splice_io_u(int fdin, int fdout, unsigned int len)
ed92ac0c 56{
9cce02e8 57 int bytes = 0;
7a6499da 58
9cce02e8 59 while (len) {
cd963e18 60 int ret = splice(fdin, NULL, fdout, NULL, len, 0);
9cce02e8
JA
61
62 if (ret < 0) {
63 if (!bytes)
64 bytes = ret;
65
66 break;
67 } else if (!ret)
68 break;
69
70 bytes += ret;
f657a2fb 71 len -= ret;
9cce02e8
JA
72 }
73
74 return bytes;
75}
76
77/*
cd963e18 78 * Receive bytes from a socket and fill them into the internal pipe
9cce02e8 79 */
cd963e18 80static int splice_in(struct thread_data *td, struct io_u *io_u)
9cce02e8
JA
81{
82 struct netio_data *nd = td->io_ops->data;
9cce02e8 83
cd963e18 84 return splice_io_u(io_u->file->fd, nd->pipes[1], io_u->xfer_buflen);
9cce02e8
JA
85}
86
87/*
cd963e18 88 * Transmit 'len' bytes from the internal pipe
9cce02e8 89 */
cd963e18
JA
90static int splice_out(struct thread_data *td, struct io_u *io_u,
91 unsigned int len)
9cce02e8
JA
92{
93 struct netio_data *nd = td->io_ops->data;
cd963e18
JA
94
95 return splice_io_u(nd->pipes[0], io_u->file->fd, len);
96}
97
98static int vmsplice_io_u(struct io_u *io_u, int fd, unsigned int len)
99{
9cce02e8
JA
100 struct iovec iov = {
101 .iov_base = io_u->xfer_buf,
102 .iov_len = len,
103 };
104 int bytes = 0;
105
106 while (iov.iov_len) {
cd963e18 107 int ret = vmsplice(fd, &iov, 1, SPLICE_F_MOVE);
9cce02e8
JA
108
109 if (ret < 0) {
110 if (!bytes)
111 bytes = ret;
112 break;
113 } else if (!ret)
114 break;
115
116 iov.iov_len -= ret;
cd963e18 117 iov.iov_base += ret;
f657a2fb 118 bytes += ret;
9cce02e8
JA
119 }
120
121 return bytes;
cd963e18 122
9cce02e8
JA
123}
124
125/*
cd963e18 126 * vmsplice() pipe to io_u buffer
9cce02e8 127 */
cd963e18
JA
128static int vmsplice_io_u_out(struct thread_data *td, struct io_u *io_u,
129 unsigned int len)
9cce02e8
JA
130{
131 struct netio_data *nd = td->io_ops->data;
9cce02e8 132
cd963e18
JA
133 return vmsplice_io_u(io_u, nd->pipes[0], len);
134}
9cce02e8 135
cd963e18
JA
136/*
137 * vmsplice() io_u to pipe
138 */
139static int vmsplice_io_u_in(struct thread_data *td, struct io_u *io_u)
140{
141 struct netio_data *nd = td->io_ops->data;
ed92ac0c 142
cd963e18 143 return vmsplice_io_u(io_u, nd->pipes[1], io_u->xfer_buflen);
9cce02e8
JA
144}
145
cd963e18
JA
146/*
147 * splice receive - transfer socket data into a pipe using splice, then map
148 * that pipe data into the io_u using vmsplice.
149 */
9cce02e8
JA
150static int fio_netio_splice_in(struct thread_data *td, struct io_u *io_u)
151{
152 int ret;
153
154 ret = splice_in(td, io_u);
cd963e18
JA
155 if (ret > 0)
156 return vmsplice_io_u_out(td, io_u, ret);
9cce02e8 157
cd963e18 158 return ret;
9cce02e8
JA
159}
160
cd963e18
JA
161/*
162 * splice transmit - map data from the io_u into a pipe by using vmsplice,
163 * then transfer that pipe to a socket using splice.
164 */
9cce02e8
JA
165static int fio_netio_splice_out(struct thread_data *td, struct io_u *io_u)
166{
167 int ret;
168
169 ret = vmsplice_io_u_in(td, io_u);
cd963e18
JA
170 if (ret > 0)
171 return splice_out(td, io_u, ret);
9cce02e8 172
cd963e18 173 return ret;
9cce02e8
JA
174}
175
176static int fio_netio_send(struct thread_data *td, struct io_u *io_u)
177{
178 int flags = 0;
179
180 /*
181 * if we are going to write more, set MSG_MORE
182 */
183 if (td->this_io_bytes[DDIR_WRITE] + io_u->xfer_buflen < td->o.size)
184 flags = MSG_MORE;
185
186 return send(io_u->file->fd, io_u->xfer_buf, io_u->xfer_buflen, flags);
187}
188
189static int fio_netio_recv(struct io_u *io_u)
190{
191 int flags = MSG_WAITALL;
192
193 return recv(io_u->file->fd, io_u->xfer_buf, io_u->xfer_buflen, flags);
194}
195
196static int fio_netio_queue(struct thread_data *td, struct io_u *io_u)
197{
198 struct netio_data *nd = td->io_ops->data;
199 int ret;
200
201 if (io_u->ddir == DDIR_WRITE) {
202 if (nd->use_splice)
203 ret = fio_netio_splice_out(td, io_u);
204 else
205 ret = fio_netio_send(td, io_u);
d4f12dd0 206 } else if (io_u->ddir == DDIR_READ) {
9cce02e8
JA
207 if (nd->use_splice)
208 ret = fio_netio_splice_in(td, io_u);
209 else
210 ret = fio_netio_recv(io_u);
d4f12dd0 211 } else
7a6499da 212 ret = 0; /* must be a SYNC */
ed92ac0c 213
cec6b55d 214 if (ret != (int) io_u->xfer_buflen) {
22819ec2 215 if (ret >= 0) {
cec6b55d
JA
216 io_u->resid = io_u->xfer_buflen - ret;
217 io_u->error = 0;
36167d82 218 return FIO_Q_COMPLETED;
ed92ac0c
JA
219 } else
220 io_u->error = errno;
221 }
222
36167d82 223 if (io_u->error)
e1161c32 224 td_verror(td, io_u->error, "xfer");
ed92ac0c 225
36167d82 226 return FIO_Q_COMPLETED;
ed92ac0c
JA
227}
228
b5af8293 229static int fio_netio_connect(struct thread_data *td, struct fio_file *f)
ed92ac0c 230{
b5af8293 231 struct netio_data *nd = td->io_ops->data;
ed92ac0c 232
b5af8293
JA
233 f->fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
234 if (f->fd < 0) {
235 td_verror(td, errno, "socket");
236 return 1;
ed92ac0c
JA
237 }
238
b5af8293
JA
239 if (connect(f->fd, (struct sockaddr *) &nd->addr, sizeof(nd->addr)) < 0) {
240 td_verror(td, errno, "connect");
241 return 1;
ed92ac0c
JA
242 }
243
244 return 0;
ed92ac0c
JA
245}
246
b5af8293 247static int fio_netio_accept(struct thread_data *td, struct fio_file *f)
5fdd124a 248{
b5af8293
JA
249 struct netio_data *nd = td->io_ops->data;
250 socklen_t socklen = sizeof(nd->addr);
5fdd124a 251 struct pollfd pfd;
b5af8293 252 int ret;
5fdd124a 253
6d86144d 254 log_info("fio: waiting for connection\n");
5fdd124a
JA
255
256 /*
257 * Accept loop. poll for incoming events, accept them. Repeat until we
258 * have all connections.
259 */
b5af8293
JA
260 while (!td->terminate) {
261 pfd.fd = nd->listenfd;
5fdd124a
JA
262 pfd.events = POLLIN;
263
264 ret = poll(&pfd, 1, -1);
265 if (ret < 0) {
266 if (errno == EINTR)
267 continue;
268
e1161c32 269 td_verror(td, errno, "poll");
5fdd124a
JA
270 break;
271 } else if (!ret)
272 continue;
273
0c09442b
JA
274 /*
275 * should be impossible
276 */
277 if (!(pfd.revents & POLLIN))
278 continue;
279
b5af8293
JA
280 f->fd = accept(nd->listenfd, (struct sockaddr *) &nd->addr, &socklen);
281 if (f->fd < 0) {
282 td_verror(td, errno, "accept");
283 return 1;
284 }
285 break;
286 }
5fdd124a 287
b5af8293
JA
288 return 0;
289}
290
b5af8293
JA
291static int fio_netio_open_file(struct thread_data *td, struct fio_file *f)
292{
293 if (td_read(td))
294 return fio_netio_accept(td, f);
295 else
296 return fio_netio_connect(td, f);
297}
298
299static int fio_netio_setup_connect(struct thread_data *td, const char *host,
300 unsigned short port)
301{
302 struct netio_data *nd = td->io_ops->data;
303
304 nd->addr.sin_family = AF_INET;
305 nd->addr.sin_port = htons(port);
306
307 if (inet_aton(host, &nd->addr.sin_addr) != 1) {
308 struct hostent *hent;
309
310 hent = gethostbyname(host);
311 if (!hent) {
312 td_verror(td, errno, "gethostbyname");
313 return 1;
5fdd124a 314 }
b5af8293
JA
315
316 memcpy(&nd->addr.sin_addr, hent->h_addr, 4);
5fdd124a
JA
317 }
318
319 return 0;
320}
321
b5af8293 322static int fio_netio_setup_listen(struct thread_data *td, short port)
ed92ac0c 323{
b5af8293 324 struct netio_data *nd = td->io_ops->data;
5fdd124a 325 int fd, opt;
ed92ac0c 326
6bedbfaf 327 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
ed92ac0c 328 if (fd < 0) {
e1161c32 329 td_verror(td, errno, "socket");
ed92ac0c
JA
330 return 1;
331 }
332
333 opt = 1;
334 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
e1161c32 335 td_verror(td, errno, "setsockopt");
ed92ac0c
JA
336 return 1;
337 }
6bedbfaf
JA
338#ifdef SO_REUSEPORT
339 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)) < 0) {
e1161c32 340 td_verror(td, errno, "setsockopt");
6bedbfaf
JA
341 return 1;
342 }
343#endif
ed92ac0c 344
b5af8293
JA
345 nd->addr.sin_family = AF_INET;
346 nd->addr.sin_addr.s_addr = htonl(INADDR_ANY);
347 nd->addr.sin_port = htons(port);
ed92ac0c 348
b5af8293 349 if (bind(fd, (struct sockaddr *) &nd->addr, sizeof(nd->addr)) < 0) {
e1161c32 350 td_verror(td, errno, "bind");
ed92ac0c
JA
351 return 1;
352 }
353 if (listen(fd, 1) < 0) {
e1161c32 354 td_verror(td, errno, "listen");
ed92ac0c
JA
355 return 1;
356 }
357
b5af8293
JA
358 nd->listenfd = fd;
359 return 0;
ed92ac0c
JA
360}
361
9bec88e1 362static int fio_netio_init(struct thread_data *td)
ed92ac0c 363{
b5af8293 364 struct netio_data *nd = td->io_ops->data;
e01547d2 365 unsigned short port;
b5af8293 366 char host[64], buf[128];
ed92ac0c 367 char *sep;
af52b345 368 int ret;
ed92ac0c 369
413dd459 370 if (td_rw(td)) {
ed92ac0c
JA
371 log_err("fio: network connections must be read OR write\n");
372 return 1;
373 }
16d55aae
JA
374 if (td_random(td)) {
375 log_err("fio: network IO can't be random\n");
376 return 1;
377 }
ed92ac0c 378
2dc1bbeb 379 strcpy(buf, td->o.filename);
ed92ac0c 380
9f9214f2 381 sep = strchr(buf, '/');
ed92ac0c 382 if (!sep) {
2dc1bbeb 383 log_err("fio: bad network host/port <<%s>>\n", td->o.filename);
ed92ac0c
JA
384 return 1;
385 }
386
387 *sep = '\0';
388 sep++;
389 strcpy(host, buf);
e01547d2 390 port = atoi(sep);
ed92ac0c 391
413dd459 392 if (td_read(td)) {
b5af8293 393 nd->send_to_net = 0;
ed92ac0c
JA
394 ret = fio_netio_setup_listen(td, port);
395 } else {
b5af8293 396 nd->send_to_net = 1;
ed92ac0c
JA
397 ret = fio_netio_setup_connect(td, host, port);
398 }
399
7bb48f84 400 return ret;
ed92ac0c
JA
401}
402
b5af8293 403static void fio_netio_cleanup(struct thread_data *td)
9bec88e1 404{
b5af8293
JA
405 struct netio_data *nd = td->io_ops->data;
406
407 if (nd) {
64b24cd8
JA
408 if (nd->listenfd != -1)
409 close(nd->listenfd);
410 if (nd->pipes[0] != -1)
411 close(nd->pipes[0]);
412 if (nd->pipes[1] != -1)
413 close(nd->pipes[1]);
414
b5af8293
JA
415 free(nd);
416 td->io_ops->data = NULL;
417 }
418}
419
420static int fio_netio_setup(struct thread_data *td)
421{
7bb48f84 422 struct netio_data *nd;
7bb48f84
JA
423
424 if (!td->io_ops->data) {
425 nd = malloc(sizeof(*nd));;
426
427 memset(nd, 0, sizeof(*nd));
428 nd->listenfd = -1;
64b24cd8 429 nd->pipes[0] = nd->pipes[1] = -1;
7bb48f84 430 td->io_ops->data = nd;
7bb48f84 431 }
b5af8293 432
9bec88e1
JA
433 return 0;
434}
435
9cce02e8
JA
436static int fio_netio_setup_splice(struct thread_data *td)
437{
438 struct netio_data *nd;
439
440 fio_netio_setup(td);
441
442 nd = td->io_ops->data;
443 if (nd) {
444 if (pipe(nd->pipes) < 0)
445 return 1;
446
447 nd->use_splice = 1;
448 return 0;
449 }
450
451 return 1;
452}
453
454static struct ioengine_ops ioengine_rw = {
ed92ac0c
JA
455 .name = "net",
456 .version = FIO_IOOPS_VERSION,
ed92ac0c
JA
457 .prep = fio_netio_prep,
458 .queue = fio_netio_queue,
ed92ac0c 459 .setup = fio_netio_setup,
9bec88e1 460 .init = fio_netio_init,
b5af8293
JA
461 .cleanup = fio_netio_cleanup,
462 .open_file = fio_netio_open_file,
463 .close_file = generic_close_file,
dc372f07 464 .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_UNIDIR,
ed92ac0c
JA
465};
466
9cce02e8
JA
467static struct ioengine_ops ioengine_splice = {
468 .name = "netsplice",
469 .version = FIO_IOOPS_VERSION,
470 .prep = fio_netio_prep,
471 .queue = fio_netio_queue,
472 .setup = fio_netio_setup_splice,
473 .init = fio_netio_init,
474 .cleanup = fio_netio_cleanup,
475 .open_file = fio_netio_open_file,
476 .close_file = generic_close_file,
b67740d3 477 .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_UNIDIR,
9cce02e8
JA
478};
479
ed92ac0c
JA
480static void fio_init fio_netio_register(void)
481{
9cce02e8
JA
482 register_ioengine(&ioengine_rw);
483 register_ioengine(&ioengine_splice);
ed92ac0c
JA
484}
485
486static void fio_exit fio_netio_unregister(void)
487{
9cce02e8
JA
488 unregister_ioengine(&ioengine_rw);
489 unregister_ioengine(&ioengine_splice);
ed92ac0c 490}