server: error handling and probe command
[fio.git] / server.c
CommitLineData
50d16976
JA
1#include <stdio.h>
2#include <stdlib.h>
142575e6 3#include <stdarg.h>
50d16976
JA
4#include <unistd.h>
5#include <limits.h>
50d16976
JA
6#include <errno.h>
7#include <fcntl.h>
8#include <sys/poll.h>
50d16976
JA
9#include <sys/types.h>
10#include <sys/wait.h>
50d16976 11#include <sys/mman.h>
50d16976
JA
12#include <netinet/in.h>
13#include <arpa/inet.h>
14#include <netdb.h>
e46d8091 15#include <syslog.h>
50d16976
JA
16
17#include "fio.h"
132159a5 18#include "server.h"
fcee5ff6 19#include "crc/crc16.h"
50d16976 20
132159a5 21int fio_net_port = 8765;
50d16976 22
009b1be4
JA
23int exit_backend = 0;
24
46c48f1f 25static int server_fd = -1;
37db14fe 26
132159a5
JA
27int fio_send_data(int sk, const void *p, unsigned int len)
28{
794d69ca
JA
29 assert(len <= sizeof(struct fio_net_cmd) + FIO_SERVER_MAX_PDU);
30
132159a5
JA
31 do {
32 int ret = send(sk, p, len, 0);
33
34 if (ret > 0) {
35 len -= ret;
36 if (!len)
37 break;
38 p += ret;
39 continue;
40 } else if (!ret)
41 break;
42 else if (errno == EAGAIN || errno == EINTR)
43 continue;
44 } while (!exit_backend);
45
46 if (!len)
47 return 0;
48
49 return 1;
50}
51
52int fio_recv_data(int sk, void *p, unsigned int len)
53{
54 do {
55 int ret = recv(sk, p, len, MSG_WAITALL);
56
57 if (ret > 0) {
58 len -= ret;
59 if (!len)
60 break;
61 p += ret;
62 continue;
63 } else if (!ret)
64 break;
65 else if (errno == EAGAIN || errno == EINTR)
66 continue;
67 } while (!exit_backend);
68
69 if (!len)
70 return 0;
71
72 return -1;
73}
74
75static int verify_convert_cmd(struct fio_net_cmd *cmd)
76{
fcee5ff6 77 uint16_t crc;
132159a5 78
fcee5ff6
JA
79 cmd->cmd_crc16 = le16_to_cpu(cmd->cmd_crc16);
80 cmd->pdu_crc16 = le16_to_cpu(cmd->pdu_crc16);
132159a5 81
fcee5ff6
JA
82 crc = crc16(cmd, FIO_NET_CMD_CRC_SZ);
83 if (crc != cmd->cmd_crc16) {
132159a5 84 log_err("fio: server bad crc on command (got %x, wanted %x)\n",
fcee5ff6 85 cmd->cmd_crc16, crc);
132159a5
JA
86 return 1;
87 }
88
89 cmd->version = le16_to_cpu(cmd->version);
90 cmd->opcode = le16_to_cpu(cmd->opcode);
91 cmd->flags = le32_to_cpu(cmd->flags);
92 cmd->serial = le64_to_cpu(cmd->serial);
93 cmd->pdu_len = le32_to_cpu(cmd->pdu_len);
94
95 switch (cmd->version) {
96 case FIO_SERVER_VER1:
97 break;
98 default:
99 log_err("fio: bad server cmd version %d\n", cmd->version);
100 return 1;
101 }
102
103 if (cmd->pdu_len > FIO_SERVER_MAX_PDU) {
104 log_err("fio: command payload too large: %u\n", cmd->pdu_len);
105 return 1;
106 }
107
108 return 0;
109}
110
a64e88da
JA
111/*
112 * Read (and defragment, if necessary) incoming commands
113 */
114struct fio_net_cmd *fio_net_recv_cmd(int sk)
132159a5 115{
a64e88da
JA
116 struct fio_net_cmd cmd, *cmdret = NULL;
117 size_t cmd_size = 0, pdu_offset = 0;
fcee5ff6 118 uint16_t crc;
a64e88da
JA
119 int ret, first = 1;
120 void *pdu = NULL;
132159a5 121
a64e88da 122 do {
cc0df00a
JA
123 struct pollfd pfd;
124
125 pfd.fd = sk;
126 pfd.events = POLLIN;
127 ret = 0;
128 do {
129 ret = poll(&pfd, 1, 100);
130 if (ret < 0) {
5fc58b05
JA
131 if (errno == EINTR)
132 break;
cc0df00a
JA
133 log_err("fio: poll: %s\n", strerror(errno));
134 break;
135 } else if (!ret)
136 continue;
137
138 if (pfd.revents & POLLIN)
139 break;
140 if (pfd.revents & (POLLERR|POLLHUP)) {
141 ret = 1;
142 break;
143 }
144 } while (ret >= 0);
145
146 if (ret < 0)
147 break;
148
a64e88da
JA
149 ret = fio_recv_data(sk, &cmd, sizeof(cmd));
150 if (ret)
151 break;
132159a5 152
a64e88da
JA
153 /* We have a command, verify it and swap if need be */
154 ret = verify_convert_cmd(&cmd);
155 if (ret)
156 break;
132159a5 157
a64e88da
JA
158 if (first)
159 cmd_size = sizeof(cmd) + cmd.pdu_len;
160 else
161 cmd_size += cmd.pdu_len;
132159a5 162
a64e88da 163 cmdret = realloc(cmdret, cmd_size);
132159a5 164
a64e88da
JA
165 if (first)
166 memcpy(cmdret, &cmd, sizeof(cmd));
167 else
168 assert(cmdret->opcode == cmd.opcode);
169
170 if (!cmd.pdu_len)
171 break;
172
173 /* There's payload, get it */
174 pdu = (void *) cmdret->payload + pdu_offset;
175 ret = fio_recv_data(sk, pdu, cmd.pdu_len);
176 if (ret)
177 break;
178
179 /* Verify payload crc */
180 crc = crc16(pdu, cmd.pdu_len);
181 if (crc != cmd.pdu_crc16) {
182 log_err("fio: server bad crc on payload ");
183 log_err("(got %x, wanted %x)\n", cmd.pdu_crc16, crc);
184 ret = 1;
185 break;
186 }
187
188 pdu_offset += cmd.pdu_len;
817f06bb
JA
189 if (!first)
190 cmdret->pdu_len += cmd.pdu_len;
a64e88da
JA
191 first = 0;
192 } while (cmd.flags & FIO_NET_CMD_F_MORE);
132159a5 193
a64e88da
JA
194 if (ret) {
195 free(cmdret);
196 cmdret = NULL;
cc0df00a 197 } else if (cmdret)
a64e88da
JA
198 cmdret->flags &= ~FIO_NET_CMD_F_MORE;
199
200 return cmdret;
132159a5
JA
201}
202
203void fio_net_cmd_crc(struct fio_net_cmd *cmd)
204{
205 uint32_t pdu_len;
206
ddcc0b69 207 cmd->cmd_crc16 = __cpu_to_le16(crc16(cmd, FIO_NET_CMD_CRC_SZ));
132159a5
JA
208
209 pdu_len = le32_to_cpu(cmd->pdu_len);
210 if (pdu_len)
ddcc0b69 211 cmd->pdu_crc16 = __cpu_to_le16(crc16(cmd->payload, pdu_len));
132159a5
JA
212}
213
a64e88da 214int fio_net_send_cmd(int fd, uint16_t opcode, const void *buf, off_t size)
794d69ca
JA
215{
216 struct fio_net_cmd *cmd;
217 size_t this_len;
218 int ret;
219
220 do {
221 this_len = size;
222 if (this_len > FIO_SERVER_MAX_PDU)
223 this_len = FIO_SERVER_MAX_PDU;
224
225 cmd = malloc(sizeof(*cmd) + this_len);
226
227 fio_init_net_cmd(cmd, opcode, buf, this_len);
228
229 if (this_len < size)
ddcc0b69 230 cmd->flags = __cpu_to_le32(FIO_NET_CMD_F_MORE);
794d69ca
JA
231
232 fio_net_cmd_crc(cmd);
233
234 ret = fio_send_data(fd, cmd, sizeof(*cmd) + this_len);
235 free(cmd);
236 size -= this_len;
237 buf += this_len;
238 } while (!ret && size);
239
240 return ret;
241}
242
cc0df00a 243int fio_net_send_simple_cmd(int sk, uint16_t opcode, uint64_t serial)
132159a5
JA
244{
245 struct fio_net_cmd cmd = {
ddcc0b69 246 .version = __cpu_to_le16(FIO_SERVER_VER1),
132159a5 247 .opcode = cpu_to_le16(opcode),
bdab4441 248 .serial = cpu_to_le64(serial),
132159a5
JA
249 };
250
251 fio_net_cmd_crc(&cmd);
252
253 return fio_send_data(sk, &cmd, sizeof(cmd));
254}
255
437377e1
JA
256static int send_quit_command(void)
257{
46c48f1f 258 dprint(FD_NET, "server: sending quit\n");
cc0df00a 259 return fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_QUIT, 0);
437377e1
JA
260}
261
794d69ca 262static int handle_cur_job(struct fio_net_cmd *cmd)
132159a5 263{
a64e88da
JA
264 void *buf = cmd->payload;
265 int ret;
132159a5 266
a64e88da 267 parse_jobs_ini(buf, 1, 0);
794d69ca
JA
268 ret = exec_run();
269 send_quit_command();
270 reset_fio_state();
132159a5
JA
271 return ret;
272}
273
c28e8e8c
JA
274static int handle_probe_cmd(struct fio_net_cmd *cmd)
275{
276 struct cmd_probe_pdu probe;
277
278 memset(&probe, 0, sizeof(probe));
279 gethostname((char *) probe.hostname, sizeof(probe.hostname));
280 probe.fio_major = 1;
281 probe.fio_minor = 58;
282 probe.fio_patch = 0;
283
284 return fio_net_send_cmd(server_fd, FIO_NET_CMD_PROBE, &probe, sizeof(probe));
285}
286
132159a5
JA
287static int handle_command(struct fio_net_cmd *cmd)
288{
289 int ret;
290
46c48f1f
JA
291 dprint(FD_NET, "server: got opcode %d\n", cmd->opcode);
292
132159a5
JA
293 switch (cmd->opcode) {
294 case FIO_NET_CMD_QUIT:
cc0df00a 295 fio_terminate_threads(TERMINATE_ALL);
c28e8e8c 296 return -1;
d7959186 297 case FIO_NET_CMD_EXIT:
132159a5 298 exit_backend = 1;
c28e8e8c 299 return -1;
132159a5 300 case FIO_NET_CMD_JOB:
794d69ca 301 ret = handle_cur_job(cmd);
132159a5 302 break;
c28e8e8c
JA
303 case FIO_NET_CMD_PROBE:
304 ret = handle_probe_cmd(cmd);
305 break;
132159a5
JA
306 default:
307 log_err("fio: unknown opcode: %d\n", cmd->opcode);
308 ret = 1;
309 }
310
311 return ret;
312}
313
314static int handle_connection(int sk)
315{
316 struct fio_net_cmd *cmd = NULL;
317 int ret = 0;
318
319 /* read forever */
320 while (!exit_backend) {
a64e88da 321 cmd = fio_net_recv_cmd(sk);
132159a5 322 if (!cmd) {
c28e8e8c 323 ret = -1;
132159a5
JA
324 break;
325 }
326
132159a5
JA
327 ret = handle_command(cmd);
328 if (ret)
329 break;
330
331 free(cmd);
c77a99e7 332 cmd = NULL;
132159a5
JA
333 }
334
335 if (cmd)
336 free(cmd);
337
338 return ret;
339}
340
cc0df00a
JA
341void fio_server_idle_loop(void)
342{
343 if (server_fd != -1)
344 handle_connection(server_fd);
345}
346
50d16976
JA
347static int accept_loop(int listen_sk)
348{
349 struct sockaddr addr;
350 unsigned int len = sizeof(addr);
009b1be4 351 struct pollfd pfd;
132159a5 352 int ret, sk, flags, exitval = 0;
50d16976 353
009b1be4
JA
354 flags = fcntl(listen_sk, F_GETFL);
355 flags |= O_NONBLOCK;
356 fcntl(listen_sk, F_SETFL, flags);
50d16976 357again:
009b1be4
JA
358 pfd.fd = listen_sk;
359 pfd.events = POLLIN;
360 do {
361 ret = poll(&pfd, 1, 100);
362 if (ret < 0) {
363 if (errno == EINTR)
364 break;
fcee5ff6 365 log_err("fio: poll: %s\n", strerror(errno));
009b1be4
JA
366 goto out;
367 } else if (!ret)
368 continue;
369
370 if (pfd.revents & POLLIN)
371 break;
372 } while (!exit_backend);
373
374 if (exit_backend)
375 goto out;
376
50d16976 377 sk = accept(listen_sk, &addr, &len);
50d16976 378 if (sk < 0) {
690e09ae 379 log_err("fio: accept: %s\n", strerror(errno));
50d16976
JA
380 return -1;
381 }
382
46c48f1f
JA
383 dprint(FD_NET, "server got a connection\n");
384
37db14fe
JA
385 server_fd = sk;
386
132159a5 387 exitval = handle_connection(sk);
50d16976 388
37db14fe 389 server_fd = -1;
50d16976 390 close(sk);
5c341e9a 391
009b1be4 392 if (!exit_backend)
5c341e9a
JA
393 goto again;
394
009b1be4 395out:
132159a5 396 return exitval;
50d16976
JA
397}
398
e46d8091 399static int fio_server(void)
50d16976
JA
400{
401 struct sockaddr_in saddr_in;
402 struct sockaddr addr;
403 unsigned int len;
afcf7758 404 int sk, opt, ret;
50d16976 405
46c48f1f
JA
406 dprint(FD_NET, "starting server\n");
407
50d16976
JA
408 sk = socket(AF_INET, SOCK_STREAM, 0);
409 if (sk < 0) {
690e09ae 410 log_err("fio: socket: %s\n", strerror(errno));
50d16976
JA
411 return -1;
412 }
413
414 opt = 1;
415 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
690e09ae 416 log_err("fio: setsockopt: %s\n", strerror(errno));
50d16976
JA
417 return -1;
418 }
419#ifdef SO_REUSEPORT
420 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)) < 0) {
690e09ae 421 log_err("fio: setsockopt: %s\n", strerror(errno));
c28e8e8c 422 return -1;
50d16976
JA
423 }
424#endif
425
426 saddr_in.sin_family = AF_INET;
427 saddr_in.sin_addr.s_addr = htonl(INADDR_ANY);
132159a5 428 saddr_in.sin_port = htons(fio_net_port);
50d16976
JA
429
430 if (bind(sk, (struct sockaddr *) &saddr_in, sizeof(saddr_in)) < 0) {
690e09ae 431 log_err("fio: bind: %s\n", strerror(errno));
50d16976
JA
432 return -1;
433 }
434
435 if (listen(sk, 1) < 0) {
690e09ae 436 log_err("fio: listen: %s\n", strerror(errno));
50d16976
JA
437 return -1;
438 }
439
440 len = sizeof(addr);
441 if (getsockname(sk, &addr, &len) < 0) {
690e09ae 442 log_err("fio: getsockname: %s\n", strerror(errno));
50d16976
JA
443 return -1;
444 }
445
afcf7758
JA
446 ret = accept_loop(sk);
447 close(sk);
448 return ret;
50d16976 449}
37db14fe 450
142575e6 451int fio_server_text_output(const char *buf, unsigned int len)
37db14fe 452{
337d75a8
JA
453 if (server_fd != -1)
454 return fio_net_send_cmd(server_fd, FIO_NET_CMD_TEXT, buf, len);
455
456 return 0;
142575e6
JA
457}
458
a64e88da
JA
459static void convert_io_stat(struct io_stat *dst, struct io_stat *src)
460{
461 dst->max_val = cpu_to_le64(src->max_val);
462 dst->min_val = cpu_to_le64(src->min_val);
463 dst->samples = cpu_to_le64(src->samples);
464 /* FIXME */
ddcc0b69
JA
465 dst->mean = __cpu_to_le64(src->mean);
466 dst->S = __cpu_to_le64(src->S);
a64e88da
JA
467}
468
469static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src)
470{
471 int i;
472
473 for (i = 0; i < 2; i++) {
474 dst->max_run[i] = cpu_to_le64(src->max_run[i]);
475 dst->min_run[i] = cpu_to_le64(src->min_run[i]);
476 dst->max_bw[i] = cpu_to_le64(src->max_bw[i]);
477 dst->min_bw[i] = cpu_to_le64(src->min_bw[i]);
478 dst->io_kb[i] = cpu_to_le64(src->io_kb[i]);
479 dst->agg[i] = cpu_to_le64(src->agg[i]);
480 }
481
482 dst->kb_base = cpu_to_le32(src->kb_base);
483 dst->groupid = cpu_to_le32(src->groupid);
484}
485
486/*
487 * Send a CMD_TS, which packs struct thread_stat and group_run_stats
488 * into a single payload.
489 */
490void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
491{
492 struct cmd_ts_pdu p;
493 int i, j;
494
317b3c8b
JA
495 memset(&p, 0, sizeof(p));
496
a64e88da
JA
497 strcpy(p.ts.name, ts->name);
498 strcpy(p.ts.verror, ts->verror);
499 strcpy(p.ts.description, ts->description);
500
ddcc0b69 501 p.ts.error = cpu_to_le32(ts->error);
a64e88da 502 p.ts.groupid = cpu_to_le32(ts->groupid);
ddcc0b69 503 p.ts.pid = cpu_to_le32(ts->pid);
a64e88da
JA
504 p.ts.members = cpu_to_le32(ts->members);
505
506 for (i = 0; i < 2; i++) {
507 convert_io_stat(&p.ts.clat_stat[i], &ts->clat_stat[i]);
508 convert_io_stat(&p.ts.slat_stat[i], &ts->slat_stat[i]);
509 convert_io_stat(&p.ts.lat_stat[i], &ts->lat_stat[i]);
510 convert_io_stat(&p.ts.bw_stat[i], &ts->bw_stat[i]);
511 }
512
513 p.ts.usr_time = cpu_to_le64(ts->usr_time);
514 p.ts.sys_time = cpu_to_le64(ts->sys_time);
515 p.ts.ctx = cpu_to_le64(ts->ctx);
516 p.ts.minf = cpu_to_le64(ts->minf);
517 p.ts.majf = cpu_to_le64(ts->majf);
518 p.ts.clat_percentiles = cpu_to_le64(ts->clat_percentiles);
519 p.ts.percentile_list = NULL;
520
521 for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
522 p.ts.io_u_map[i] = cpu_to_le32(ts->io_u_map[i]);
523 p.ts.io_u_submit[i] = cpu_to_le32(ts->io_u_submit[i]);
524 p.ts.io_u_complete[i] = cpu_to_le32(ts->io_u_complete[i]);
525 }
526
527 for (i = 0; i < FIO_IO_U_LAT_U_NR; i++) {
528 p.ts.io_u_lat_u[i] = cpu_to_le32(ts->io_u_lat_u[i]);
529 p.ts.io_u_lat_m[i] = cpu_to_le32(ts->io_u_lat_m[i]);
530 }
531
532 for (i = 0; i < 2; i++)
533 for (j = 0; j < FIO_IO_U_PLAT_NR; j++)
534 p.ts.io_u_plat[i][j] = cpu_to_le32(ts->io_u_plat[i][j]);
535
536 for (i = 0; i < 3; i++) {
537 p.ts.total_io_u[i] = cpu_to_le64(ts->total_io_u[i]);
93eee04a 538 p.ts.short_io_u[i] = cpu_to_le64(ts->short_io_u[i]);
a64e88da
JA
539 }
540
93eee04a 541 p.ts.total_submit = cpu_to_le64(ts->total_submit);
a64e88da
JA
542 p.ts.total_complete = cpu_to_le64(ts->total_complete);
543
544 for (i = 0; i < 2; i++) {
545 p.ts.io_bytes[i] = cpu_to_le64(ts->io_bytes[i]);
546 p.ts.runtime[i] = cpu_to_le64(ts->runtime[i]);
547 }
548
549 p.ts.total_run_time = cpu_to_le64(ts->total_run_time);
550 p.ts.continue_on_error = cpu_to_le16(ts->continue_on_error);
551 p.ts.total_err_count = cpu_to_le64(ts->total_err_count);
ddcc0b69
JA
552 p.ts.first_error = cpu_to_le32(ts->first_error);
553 p.ts.kb_base = cpu_to_le32(ts->kb_base);
a64e88da
JA
554
555 convert_gs(&p.rs, rs);
556
557 fio_net_send_cmd(server_fd, FIO_NET_CMD_TS, &p, sizeof(p));
558}
559
560void fio_server_send_gs(struct group_run_stats *rs)
561{
562 struct group_run_stats gs;
563
564 convert_gs(&gs, rs);
565 fio_net_send_cmd(server_fd, FIO_NET_CMD_GS, &gs, sizeof(gs));
566}
567
cf451d1e
JA
568void fio_server_send_status(void)
569{
1d1f45ae
JA
570 struct jobs_eta *je;
571 size_t size;
572 void *buf;
cf451d1e
JA
573 int i;
574
1d1f45ae
JA
575 size = sizeof(*je) + thread_number * sizeof(char);
576 buf = malloc(size);
577 memset(buf, 0, size);
578 je = buf;
579
580 if (!calc_thread_status(je)) {
581 free(je);
cf451d1e 582 return;
1d1f45ae 583 }
cf451d1e 584
1d1f45ae
JA
585 je->nr_running = cpu_to_le32(je->nr_running);
586 je->nr_ramp = cpu_to_le32(je->nr_ramp);
587 je->nr_pending = cpu_to_le32(je->nr_pending);
588 je->files_open = cpu_to_le32(je->files_open);
589 je->m_rate = cpu_to_le32(je->m_rate);
590 je->t_rate = cpu_to_le32(je->t_rate);
591 je->m_iops = cpu_to_le32(je->m_iops);
592 je->t_iops = cpu_to_le32(je->t_iops);
cf451d1e
JA
593
594 for (i = 0; i < 2; i++) {
1d1f45ae
JA
595 je->rate[i] = cpu_to_le32(je->rate[i]);
596 je->iops[i] = cpu_to_le32(je->iops[i]);
cf451d1e
JA
597 }
598
1d1f45ae
JA
599 je->elapsed_sec = cpu_to_le32(je->nr_running);
600 je->eta_sec = cpu_to_le64(je->eta_sec);
cf451d1e 601
1d1f45ae
JA
602 fio_net_send_cmd(server_fd, FIO_NET_CMD_ETA, buf, size);
603 free(je);
cf451d1e
JA
604}
605
142575e6
JA
606int fio_server_log(const char *format, ...)
607{
608 char buffer[1024];
609 va_list args;
82fa6b21 610 size_t len;
142575e6
JA
611
612 va_start(args, format);
82fa6b21 613 len = vsnprintf(buffer, sizeof(buffer), format, args);
142575e6
JA
614 va_end(args);
615
82fa6b21 616 return fio_server_text_output(buffer, len);
37db14fe 617}
e46d8091
JA
618
619int fio_start_server(int daemonize)
620{
621 pid_t pid;
622
623 if (!daemonize)
624 return fio_server();
625
626 openlog("fio", LOG_NDELAY|LOG_NOWAIT|LOG_PID, LOG_USER);
627 pid = fork();
628 if (pid < 0) {
629 syslog(LOG_ERR, "failed server fork");
c28e8e8c 630 return -1;
e46d8091
JA
631 } else if (pid)
632 exit(0);
633
634 setsid();
635 close(STDIN_FILENO);
636 close(STDOUT_FILENO);
637 close(STDERR_FILENO);
638 f_out = NULL;
639 f_err = NULL;
640 log_syslog = 1;
641 return fio_server();
642}