client: handle connection failure
[fio.git] / server.c
... / ...
CommitLineData
1#include <stdio.h>
2#include <stdlib.h>
3#include <stdarg.h>
4#include <unistd.h>
5#include <limits.h>
6#include <errno.h>
7#include <fcntl.h>
8#include <sys/poll.h>
9#include <sys/types.h>
10#include <sys/wait.h>
11#include <sys/mman.h>
12#include <netinet/in.h>
13#include <arpa/inet.h>
14#include <netdb.h>
15#include <syslog.h>
16
17#include "fio.h"
18#include "server.h"
19#include "crc/crc16.h"
20
21int fio_net_port = 8765;
22
23int exit_backend = 0;
24
25static int server_fd = -1;
26
27int fio_send_data(int sk, const void *p, unsigned int len)
28{
29 assert(len <= sizeof(struct fio_net_cmd) + FIO_SERVER_MAX_PDU);
30
31 do {
32 int ret = send(sk, p, len, 0);
33
34 if (ret > 0) {
35 len -= ret;
36 if (!len)
37 break;
38 p += ret;
39 continue;
40 } else if (!ret)
41 break;
42 else if (errno == EAGAIN || errno == EINTR)
43 continue;
44 } while (!exit_backend);
45
46 if (!len)
47 return 0;
48
49 return 1;
50}
51
52int fio_recv_data(int sk, void *p, unsigned int len)
53{
54 do {
55 int ret = recv(sk, p, len, MSG_WAITALL);
56
57 if (ret > 0) {
58 len -= ret;
59 if (!len)
60 break;
61 p += ret;
62 continue;
63 } else if (!ret)
64 break;
65 else if (errno == EAGAIN || errno == EINTR)
66 continue;
67 } while (!exit_backend);
68
69 if (!len)
70 return 0;
71
72 return -1;
73}
74
75static int verify_convert_cmd(struct fio_net_cmd *cmd)
76{
77 uint16_t crc;
78
79 cmd->cmd_crc16 = le16_to_cpu(cmd->cmd_crc16);
80 cmd->pdu_crc16 = le16_to_cpu(cmd->pdu_crc16);
81
82 crc = crc16(cmd, FIO_NET_CMD_CRC_SZ);
83 if (crc != cmd->cmd_crc16) {
84 log_err("fio: server bad crc on command (got %x, wanted %x)\n",
85 cmd->cmd_crc16, crc);
86 return 1;
87 }
88
89 cmd->version = le16_to_cpu(cmd->version);
90 cmd->opcode = le16_to_cpu(cmd->opcode);
91 cmd->flags = le32_to_cpu(cmd->flags);
92 cmd->serial = le64_to_cpu(cmd->serial);
93 cmd->pdu_len = le32_to_cpu(cmd->pdu_len);
94
95 switch (cmd->version) {
96 case FIO_SERVER_VER1:
97 break;
98 default:
99 log_err("fio: bad server cmd version %d\n", cmd->version);
100 return 1;
101 }
102
103 if (cmd->pdu_len > FIO_SERVER_MAX_PDU) {
104 log_err("fio: command payload too large: %u\n", cmd->pdu_len);
105 return 1;
106 }
107
108 return 0;
109}
110
111/*
112 * Read (and defragment, if necessary) incoming commands
113 */
114struct fio_net_cmd *fio_net_recv_cmd(int sk)
115{
116 struct fio_net_cmd cmd, *cmdret = NULL;
117 size_t cmd_size = 0, pdu_offset = 0;
118 uint16_t crc;
119 int ret, first = 1;
120 void *pdu = NULL;
121
122 do {
123 struct pollfd pfd;
124
125 pfd.fd = sk;
126 pfd.events = POLLIN;
127 ret = 0;
128 do {
129 ret = poll(&pfd, 1, 100);
130 if (ret < 0) {
131 if (errno == EINTR)
132 break;
133 log_err("fio: poll: %s\n", strerror(errno));
134 break;
135 } else if (!ret)
136 continue;
137
138 if (pfd.revents & POLLIN)
139 break;
140 if (pfd.revents & (POLLERR|POLLHUP)) {
141 ret = 1;
142 break;
143 }
144 } while (ret >= 0);
145
146 if (ret < 0)
147 break;
148
149 ret = fio_recv_data(sk, &cmd, sizeof(cmd));
150 if (ret)
151 break;
152
153 /* We have a command, verify it and swap if need be */
154 ret = verify_convert_cmd(&cmd);
155 if (ret)
156 break;
157
158 if (first)
159 cmd_size = sizeof(cmd) + cmd.pdu_len;
160 else
161 cmd_size += cmd.pdu_len;
162
163 cmdret = realloc(cmdret, cmd_size);
164
165 if (first)
166 memcpy(cmdret, &cmd, sizeof(cmd));
167 else
168 assert(cmdret->opcode == cmd.opcode);
169
170 if (!cmd.pdu_len)
171 break;
172
173 /* There's payload, get it */
174 pdu = (void *) cmdret->payload + pdu_offset;
175 ret = fio_recv_data(sk, pdu, cmd.pdu_len);
176 if (ret)
177 break;
178
179 /* Verify payload crc */
180 crc = crc16(pdu, cmd.pdu_len);
181 if (crc != cmd.pdu_crc16) {
182 log_err("fio: server bad crc on payload ");
183 log_err("(got %x, wanted %x)\n", cmd.pdu_crc16, crc);
184 ret = 1;
185 break;
186 }
187
188 pdu_offset += cmd.pdu_len;
189 if (!first)
190 cmdret->pdu_len += cmd.pdu_len;
191 first = 0;
192 } while (cmd.flags & FIO_NET_CMD_F_MORE);
193
194 if (ret) {
195 free(cmdret);
196 cmdret = NULL;
197 } else if (cmdret)
198 cmdret->flags &= ~FIO_NET_CMD_F_MORE;
199
200 return cmdret;
201}
202
203void fio_net_cmd_crc(struct fio_net_cmd *cmd)
204{
205 uint32_t pdu_len;
206
207 cmd->cmd_crc16 = __cpu_to_le16(crc16(cmd, FIO_NET_CMD_CRC_SZ));
208
209 pdu_len = le32_to_cpu(cmd->pdu_len);
210 if (pdu_len)
211 cmd->pdu_crc16 = __cpu_to_le16(crc16(cmd->payload, pdu_len));
212}
213
214int fio_net_send_cmd(int fd, uint16_t opcode, const void *buf, off_t size)
215{
216 struct fio_net_cmd *cmd;
217 size_t this_len;
218 int ret;
219
220 do {
221 this_len = size;
222 if (this_len > FIO_SERVER_MAX_PDU)
223 this_len = FIO_SERVER_MAX_PDU;
224
225 cmd = malloc(sizeof(*cmd) + this_len);
226
227 fio_init_net_cmd(cmd, opcode, buf, this_len);
228
229 if (this_len < size)
230 cmd->flags = __cpu_to_le32(FIO_NET_CMD_F_MORE);
231
232 fio_net_cmd_crc(cmd);
233
234 ret = fio_send_data(fd, cmd, sizeof(*cmd) + this_len);
235 free(cmd);
236 size -= this_len;
237 buf += this_len;
238 } while (!ret && size);
239
240 return ret;
241}
242
243int fio_net_send_simple_cmd(int sk, uint16_t opcode, uint64_t serial)
244{
245 struct fio_net_cmd cmd = {
246 .version = __cpu_to_le16(FIO_SERVER_VER1),
247 .opcode = cpu_to_le16(opcode),
248 .serial = cpu_to_le64(serial),
249 };
250
251 fio_net_cmd_crc(&cmd);
252
253 return fio_send_data(sk, &cmd, sizeof(cmd));
254}
255
256static int send_quit_command(void)
257{
258 dprint(FD_NET, "server: sending quit\n");
259 return fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_QUIT, 0);
260}
261
262static int handle_cur_job(struct fio_net_cmd *cmd)
263{
264 void *buf = cmd->payload;
265 int ret;
266
267 parse_jobs_ini(buf, 1, 0);
268 ret = exec_run();
269 send_quit_command();
270 reset_fio_state();
271 return ret;
272}
273
274static int handle_command(struct fio_net_cmd *cmd)
275{
276 int ret;
277
278 dprint(FD_NET, "server: got opcode %d\n", cmd->opcode);
279
280 switch (cmd->opcode) {
281 case FIO_NET_CMD_QUIT:
282 fio_terminate_threads(TERMINATE_ALL);
283 return 1;
284 case FIO_NET_CMD_EXIT:
285 exit_backend = 1;
286 return 1;
287 case FIO_NET_CMD_ACK:
288 return 0;
289 case FIO_NET_CMD_NAK:
290 return 1;
291 case FIO_NET_CMD_JOB:
292 ret = handle_cur_job(cmd);
293 break;
294 default:
295 log_err("fio: unknown opcode: %d\n", cmd->opcode);
296 ret = 1;
297 }
298
299 return ret;
300}
301
302static int handle_connection(int sk)
303{
304 struct fio_net_cmd *cmd = NULL;
305 int ret = 0;
306
307 /* read forever */
308 while (!exit_backend) {
309 cmd = fio_net_recv_cmd(sk);
310 if (!cmd) {
311 ret = 1;
312 break;
313 }
314
315 ret = handle_command(cmd);
316 if (ret)
317 break;
318
319 free(cmd);
320 cmd = NULL;
321 }
322
323 if (cmd)
324 free(cmd);
325
326 return ret;
327}
328
329void fio_server_idle_loop(void)
330{
331 if (server_fd != -1)
332 handle_connection(server_fd);
333}
334
335static int accept_loop(int listen_sk)
336{
337 struct sockaddr addr;
338 unsigned int len = sizeof(addr);
339 struct pollfd pfd;
340 int ret, sk, flags, exitval = 0;
341
342 flags = fcntl(listen_sk, F_GETFL);
343 flags |= O_NONBLOCK;
344 fcntl(listen_sk, F_SETFL, flags);
345again:
346 pfd.fd = listen_sk;
347 pfd.events = POLLIN;
348 do {
349 ret = poll(&pfd, 1, 100);
350 if (ret < 0) {
351 if (errno == EINTR)
352 break;
353 log_err("fio: poll: %s\n", strerror(errno));
354 goto out;
355 } else if (!ret)
356 continue;
357
358 if (pfd.revents & POLLIN)
359 break;
360 } while (!exit_backend);
361
362 if (exit_backend)
363 goto out;
364
365 sk = accept(listen_sk, &addr, &len);
366 if (sk < 0) {
367 log_err("fio: accept: %s\n", strerror(errno));
368 return -1;
369 }
370
371 dprint(FD_NET, "server got a connection\n");
372
373 server_fd = sk;
374
375 exitval = handle_connection(sk);
376
377 server_fd = -1;
378 close(sk);
379
380 if (!exit_backend)
381 goto again;
382
383out:
384 return exitval;
385}
386
387static int fio_server(void)
388{
389 struct sockaddr_in saddr_in;
390 struct sockaddr addr;
391 unsigned int len;
392 int sk, opt, ret;
393
394 dprint(FD_NET, "starting server\n");
395
396 sk = socket(AF_INET, SOCK_STREAM, 0);
397 if (sk < 0) {
398 log_err("fio: socket: %s\n", strerror(errno));
399 return -1;
400 }
401
402 opt = 1;
403 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
404 log_err("fio: setsockopt: %s\n", strerror(errno));
405 return -1;
406 }
407#ifdef SO_REUSEPORT
408 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)) < 0) {
409 log_err("fio: setsockopt: %s\n", strerror(errno));
410 return 1;
411 }
412#endif
413
414 saddr_in.sin_family = AF_INET;
415 saddr_in.sin_addr.s_addr = htonl(INADDR_ANY);
416 saddr_in.sin_port = htons(fio_net_port);
417
418 if (bind(sk, (struct sockaddr *) &saddr_in, sizeof(saddr_in)) < 0) {
419 log_err("fio: bind: %s\n", strerror(errno));
420 return -1;
421 }
422
423 if (listen(sk, 1) < 0) {
424 log_err("fio: listen: %s\n", strerror(errno));
425 return -1;
426 }
427
428 len = sizeof(addr);
429 if (getsockname(sk, &addr, &len) < 0) {
430 log_err("fio: getsockname: %s\n", strerror(errno));
431 return -1;
432 }
433
434 ret = accept_loop(sk);
435 close(sk);
436 return ret;
437}
438
439int fio_server_text_output(const char *buf, unsigned int len)
440{
441 if (server_fd != -1)
442 return fio_net_send_cmd(server_fd, FIO_NET_CMD_TEXT, buf, len);
443
444 return 0;
445}
446
447static void convert_io_stat(struct io_stat *dst, struct io_stat *src)
448{
449 dst->max_val = cpu_to_le64(src->max_val);
450 dst->min_val = cpu_to_le64(src->min_val);
451 dst->samples = cpu_to_le64(src->samples);
452 /* FIXME */
453 dst->mean = __cpu_to_le64(src->mean);
454 dst->S = __cpu_to_le64(src->S);
455}
456
457static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src)
458{
459 int i;
460
461 for (i = 0; i < 2; i++) {
462 dst->max_run[i] = cpu_to_le64(src->max_run[i]);
463 dst->min_run[i] = cpu_to_le64(src->min_run[i]);
464 dst->max_bw[i] = cpu_to_le64(src->max_bw[i]);
465 dst->min_bw[i] = cpu_to_le64(src->min_bw[i]);
466 dst->io_kb[i] = cpu_to_le64(src->io_kb[i]);
467 dst->agg[i] = cpu_to_le64(src->agg[i]);
468 }
469
470 dst->kb_base = cpu_to_le32(src->kb_base);
471 dst->groupid = cpu_to_le32(src->groupid);
472}
473
474/*
475 * Send a CMD_TS, which packs struct thread_stat and group_run_stats
476 * into a single payload.
477 */
478void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
479{
480 struct cmd_ts_pdu p;
481 int i, j;
482
483 memset(&p, 0, sizeof(p));
484
485 strcpy(p.ts.name, ts->name);
486 strcpy(p.ts.verror, ts->verror);
487 strcpy(p.ts.description, ts->description);
488
489 p.ts.error = cpu_to_le32(ts->error);
490 p.ts.groupid = cpu_to_le32(ts->groupid);
491 p.ts.pid = cpu_to_le32(ts->pid);
492 p.ts.members = cpu_to_le32(ts->members);
493
494 for (i = 0; i < 2; i++) {
495 convert_io_stat(&p.ts.clat_stat[i], &ts->clat_stat[i]);
496 convert_io_stat(&p.ts.slat_stat[i], &ts->slat_stat[i]);
497 convert_io_stat(&p.ts.lat_stat[i], &ts->lat_stat[i]);
498 convert_io_stat(&p.ts.bw_stat[i], &ts->bw_stat[i]);
499 }
500
501 p.ts.usr_time = cpu_to_le64(ts->usr_time);
502 p.ts.sys_time = cpu_to_le64(ts->sys_time);
503 p.ts.ctx = cpu_to_le64(ts->ctx);
504 p.ts.minf = cpu_to_le64(ts->minf);
505 p.ts.majf = cpu_to_le64(ts->majf);
506 p.ts.clat_percentiles = cpu_to_le64(ts->clat_percentiles);
507 p.ts.percentile_list = NULL;
508
509 for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
510 p.ts.io_u_map[i] = cpu_to_le32(ts->io_u_map[i]);
511 p.ts.io_u_submit[i] = cpu_to_le32(ts->io_u_submit[i]);
512 p.ts.io_u_complete[i] = cpu_to_le32(ts->io_u_complete[i]);
513 }
514
515 for (i = 0; i < FIO_IO_U_LAT_U_NR; i++) {
516 p.ts.io_u_lat_u[i] = cpu_to_le32(ts->io_u_lat_u[i]);
517 p.ts.io_u_lat_m[i] = cpu_to_le32(ts->io_u_lat_m[i]);
518 }
519
520 for (i = 0; i < 2; i++)
521 for (j = 0; j < FIO_IO_U_PLAT_NR; j++)
522 p.ts.io_u_plat[i][j] = cpu_to_le32(ts->io_u_plat[i][j]);
523
524 for (i = 0; i < 3; i++) {
525 p.ts.total_io_u[i] = cpu_to_le64(ts->total_io_u[i]);
526 p.ts.short_io_u[i] = cpu_to_le64(ts->short_io_u[i]);
527 }
528
529 p.ts.total_submit = cpu_to_le64(ts->total_submit);
530 p.ts.total_complete = cpu_to_le64(ts->total_complete);
531
532 for (i = 0; i < 2; i++) {
533 p.ts.io_bytes[i] = cpu_to_le64(ts->io_bytes[i]);
534 p.ts.runtime[i] = cpu_to_le64(ts->runtime[i]);
535 }
536
537 p.ts.total_run_time = cpu_to_le64(ts->total_run_time);
538 p.ts.continue_on_error = cpu_to_le16(ts->continue_on_error);
539 p.ts.total_err_count = cpu_to_le64(ts->total_err_count);
540 p.ts.first_error = cpu_to_le32(ts->first_error);
541 p.ts.kb_base = cpu_to_le32(ts->kb_base);
542
543 convert_gs(&p.rs, rs);
544
545 fio_net_send_cmd(server_fd, FIO_NET_CMD_TS, &p, sizeof(p));
546}
547
548void fio_server_send_gs(struct group_run_stats *rs)
549{
550 struct group_run_stats gs;
551
552 convert_gs(&gs, rs);
553 fio_net_send_cmd(server_fd, FIO_NET_CMD_GS, &gs, sizeof(gs));
554}
555
556void fio_server_send_status(void)
557{
558 struct jobs_eta *je;
559 size_t size;
560 void *buf;
561 int i;
562
563 size = sizeof(*je) + thread_number * sizeof(char);
564 buf = malloc(size);
565 memset(buf, 0, size);
566 je = buf;
567
568 if (!calc_thread_status(je)) {
569 free(je);
570 return;
571 }
572
573 je->nr_running = cpu_to_le32(je->nr_running);
574 je->nr_ramp = cpu_to_le32(je->nr_ramp);
575 je->nr_pending = cpu_to_le32(je->nr_pending);
576 je->files_open = cpu_to_le32(je->files_open);
577 je->m_rate = cpu_to_le32(je->m_rate);
578 je->t_rate = cpu_to_le32(je->t_rate);
579 je->m_iops = cpu_to_le32(je->m_iops);
580 je->t_iops = cpu_to_le32(je->t_iops);
581
582 for (i = 0; i < 2; i++) {
583 je->rate[i] = cpu_to_le32(je->rate[i]);
584 je->iops[i] = cpu_to_le32(je->iops[i]);
585 }
586
587 je->elapsed_sec = cpu_to_le32(je->nr_running);
588 je->eta_sec = cpu_to_le64(je->eta_sec);
589
590 fio_net_send_cmd(server_fd, FIO_NET_CMD_ETA, buf, size);
591 free(je);
592}
593
594int fio_server_log(const char *format, ...)
595{
596 char buffer[1024];
597 va_list args;
598 size_t len;
599
600 va_start(args, format);
601 len = vsnprintf(buffer, sizeof(buffer), format, args);
602 va_end(args);
603
604 return fio_server_text_output(buffer, len);
605}
606
607int fio_start_server(int daemonize)
608{
609 pid_t pid;
610
611 if (!daemonize)
612 return fio_server();
613
614 openlog("fio", LOG_NDELAY|LOG_NOWAIT|LOG_PID, LOG_USER);
615 pid = fork();
616 if (pid < 0) {
617 syslog(LOG_ERR, "failed server fork");
618 return 1;
619 } else if (pid)
620 exit(0);
621
622 setsid();
623 close(STDIN_FILENO);
624 close(STDOUT_FILENO);
625 close(STDERR_FILENO);
626 f_out = NULL;
627 f_err = NULL;
628 log_syslog = 1;
629 return fio_server();
630}