client: check return of dup(2)
[fio.git] / client.c
index 779fb9d7f8d04f53bd56388d13f869d0760980e5..611ee2f5a3f929d9d7ae7c7d14c229196bdfedbb 100644 (file)
--- a/client.c
+++ b/client.c
@@ -1,13 +1,11 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <limits.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <sys/poll.h>
+#include <poll.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/wait.h>
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <netinet/in.h>
 #include "server.h"
 #include "flist.h"
 #include "hash.h"
-#include "verify.h"
+#include "verify-state.h"
 
 static void handle_du(struct fio_client *client, struct fio_net_cmd *cmd);
 static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd);
 static void handle_gs(struct fio_client *client, struct fio_net_cmd *cmd);
 static void handle_probe(struct fio_client *client, struct fio_net_cmd *cmd);
 static void handle_text(struct fio_client *client, struct fio_net_cmd *cmd);
-static void handle_stop(struct fio_client *client, struct fio_net_cmd *cmd);
+static void handle_stop(struct fio_client *client);
 static void handle_start(struct fio_client *client, struct fio_net_cmd *cmd);
 
 static void convert_text(struct fio_net_cmd *cmd);
@@ -120,6 +118,54 @@ static int read_data(int fd, void *data, size_t size)
        return 0;
 }
 
+static int read_ini_data(int fd, void *data, size_t size)
+{
+       char *p = data;
+       int ret = 0;
+       FILE *fp;
+       int dupfd;
+
+       dupfd = dup(fd);
+       if (dupfd < 0)
+               return errno;
+
+       fp = fdopen(dupfd, "r");
+       if (!fp)
+               return errno;
+
+       while (1) {
+               ssize_t len;
+               char buf[OPT_LEN_MAX+1], *sub;
+
+               if (!fgets(buf, sizeof(buf), fp)) {
+                       if (ferror(fp)) {
+                               if (errno == EAGAIN || errno == EINTR)
+                                       continue;
+                               ret = errno;
+                       }
+                       break;
+               }
+
+               sub = fio_option_dup_subs(buf);
+               len = strlen(sub);
+               if (len + 1 > size) {
+                       log_err("fio: no space left to read data\n");
+                       free(sub);
+                       ret = ENOSPC;
+                       break;
+               }
+
+               memcpy(p, sub, len);
+               free(sub);
+               p += len;
+               *p = '\0';
+               size -= len;
+       }
+
+       fclose(fp);
+       return ret;
+}
+
 static void fio_client_json_init(void)
 {
        char time_buf[32];
@@ -765,13 +811,17 @@ static int __fio_client_send_local_ini(struct fio_client *client,
                return ret;
        }
 
+       /*
+        * Add extra space for variable expansion, but doesn't guarantee.
+        */
+       sb.st_size += OPT_LEN_MAX;
        p_size = sb.st_size + sizeof(*pdu);
        pdu = malloc(p_size);
        buf = pdu->buf;
 
        len = sb.st_size;
        p = buf;
-       if (read_data(fd, p, len)) {
+       if (read_ini_data(fd, p, len)) {
                log_err("fio: failed reading job file %s\n", filename);
                close(fd);
                free(pdu);
@@ -905,21 +955,21 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
        }
 
        for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
-               dst->io_u_map[i]        = le32_to_cpu(src->io_u_map[i]);
-               dst->io_u_submit[i]     = le32_to_cpu(src->io_u_submit[i]);
-               dst->io_u_complete[i]   = le32_to_cpu(src->io_u_complete[i]);
+               dst->io_u_map[i]        = le64_to_cpu(src->io_u_map[i]);
+               dst->io_u_submit[i]     = le64_to_cpu(src->io_u_submit[i]);
+               dst->io_u_complete[i]   = le64_to_cpu(src->io_u_complete[i]);
        }
 
        for (i = 0; i < FIO_IO_U_LAT_N_NR; i++)
-               dst->io_u_lat_n[i]      = le32_to_cpu(src->io_u_lat_n[i]);
+               dst->io_u_lat_n[i]      = le64_to_cpu(src->io_u_lat_n[i]);
        for (i = 0; i < FIO_IO_U_LAT_U_NR; i++)
-               dst->io_u_lat_u[i]      = le32_to_cpu(src->io_u_lat_u[i]);
+               dst->io_u_lat_u[i]      = le64_to_cpu(src->io_u_lat_u[i]);
        for (i = 0; i < FIO_IO_U_LAT_M_NR; i++)
-               dst->io_u_lat_m[i]      = le32_to_cpu(src->io_u_lat_m[i]);
+               dst->io_u_lat_m[i]      = le64_to_cpu(src->io_u_lat_m[i]);
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++)
                for (j = 0; j < FIO_IO_U_PLAT_NR; j++)
-                       dst->io_u_plat[i][j] = le32_to_cpu(src->io_u_plat[i][j]);
+                       dst->io_u_plat[i][j] = le64_to_cpu(src->io_u_plat[i][j]);
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                dst->total_io_u[i]      = le64_to_cpu(src->total_io_u[i]);
@@ -942,6 +992,8 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
        dst->kb_base            = le32_to_cpu(src->kb_base);
        dst->unit_base          = le32_to_cpu(src->unit_base);
 
+       dst->sig_figs           = le32_to_cpu(src->sig_figs);
+
        dst->latency_depth      = le32_to_cpu(src->latency_depth);
        dst->latency_target     = le64_to_cpu(src->latency_target);
        dst->latency_window     = le64_to_cpu(src->latency_window);
@@ -959,7 +1011,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
        dst->ss_deviation.u.f   = fio_uint64_to_double(le64_to_cpu(src->ss_deviation.u.i));
        dst->ss_criterion.u.f   = fio_uint64_to_double(le64_to_cpu(src->ss_criterion.u.i));
 
-       if (dst->ss_state & __FIO_SS_DATA) {
+       if (dst->ss_state & FIO_SS_DATA) {
                for (i = 0; i < dst->ss_dur; i++ ) {
                        dst->ss_iops_data[i] = le64_to_cpu(src->ss_iops_data[i]);
                        dst->ss_bw_data[i] = le64_to_cpu(src->ss_bw_data[i]);
@@ -982,6 +1034,7 @@ static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src)
 
        dst->kb_base    = le32_to_cpu(src->kb_base);
        dst->unit_base  = le32_to_cpu(src->unit_base);
+       dst->sig_figs   = le32_to_cpu(src->sig_figs);
        dst->groupid    = le32_to_cpu(src->groupid);
        dst->unified_rw_rep     = le32_to_cpu(src->unified_rw_rep);
 }
@@ -1021,6 +1074,7 @@ static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd)
        client_ts.thread_number = p->ts.thread_number;
        client_ts.groupid = p->ts.groupid;
        client_ts.unified_rw_rep = p->ts.unified_rw_rep;
+       client_ts.sig_figs = p->ts.sig_figs;
 
        if (++sum_stat_nr == sum_stat_clients) {
                strcpy(client_ts.name, "All clients");
@@ -1167,6 +1221,7 @@ static void convert_jobs_eta(struct jobs_eta *je)
        je->nr_threads          = le32_to_cpu(je->nr_threads);
        je->is_pow2             = le32_to_cpu(je->is_pow2);
        je->unit_base           = le32_to_cpu(je->unit_base);
+       je->sig_figs            = le32_to_cpu(je->sig_figs);
 }
 
 void fio_client_sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je)
@@ -1278,7 +1333,7 @@ static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *sample
        int log_offset;
        uint64_t i, j, nr_samples;
        struct io_u_plat_entry *entry;
-       unsigned int *io_u_plat;
+       uint64_t *io_u_plat;
 
        int stride = 1 << hist_coarseness;
 
@@ -1301,9 +1356,9 @@ static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *sample
                fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time,
                                                io_sample_ddir(s), s->bs);
                for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) {
-                       fprintf(f, "%lu, ", hist_sum(j, stride, io_u_plat, NULL));
+                       fprintf(f, "%llu, ", (unsigned long long)hist_sum(j, stride, io_u_plat, NULL));
                }
-               fprintf(f, "%lu\n", (unsigned long)
+               fprintf(f, "%llu\n", (unsigned long long)
                        hist_sum(FIO_IO_U_PLAT_NR - stride, stride, io_u_plat, NULL));
 
        }
@@ -1336,7 +1391,7 @@ static int fio_client_handle_iolog(struct fio_client *client,
        sprintf(log_pathname, "%s.%s", pdu->name, client->hostname);
 
        if (store_direct) {
-               ssize_t ret;
+               ssize_t wrote;
                size_t sz;
                int fd;
 
@@ -1350,10 +1405,10 @@ static int fio_client_handle_iolog(struct fio_client *client,
                }
 
                sz = cmd->pdu_len - sizeof(*pdu);
-               ret = write(fd, pdu->samples, sz);
+               wrote = write(fd, pdu->samples, sz);
                close(fd);
 
-               if (ret != sz) {
+               if (wrote != sz) {
                        log_err("fio: short write on compressed log\n");
                        ret = 1;
                        goto out;
@@ -1438,7 +1493,7 @@ static void handle_start(struct fio_client *client, struct fio_net_cmd *cmd)
        sum_stat_clients += client->nr_stat;
 }
 
-static void handle_stop(struct fio_client *client, struct fio_net_cmd *cmd)
+static void handle_stop(struct fio_client *client)
 {
        if (client->error)
                log_info("client <%s>: exited with error %d\n", client->hostname, client->error);
@@ -1662,6 +1717,8 @@ int fio_handle_client(struct fio_client *client)
        dprint(FD_NET, "client: got cmd op %s from %s (pdu=%u)\n",
                fio_server_op(cmd->opcode), client->hostname, cmd->pdu_len);
 
+       client->last_cmd = cmd->opcode;
+
        switch (cmd->opcode) {
        case FIO_NET_CMD_QUIT:
                if (ops->quit)
@@ -1685,7 +1742,7 @@ int fio_handle_client(struct fio_client *client)
                struct cmd_ts_pdu *p = (struct cmd_ts_pdu *) cmd->payload;
 
                dprint(FD_NET, "client: ts->ss_state = %u\n", (unsigned int) le32_to_cpu(p->ts.ss_state));
-               if (le32_to_cpu(p->ts.ss_state) & __FIO_SS_DATA) {
+               if (le32_to_cpu(p->ts.ss_state) & FIO_SS_DATA) {
                        dprint(FD_NET, "client: received steadystate ring buffers\n");
 
                        size = le64_to_cpu(p->ts.ss_dur);
@@ -1739,7 +1796,7 @@ int fio_handle_client(struct fio_client *client)
                client->state = Client_stopped;
                client->error = le32_to_cpu(pdu->error);
                client->signal = le32_to_cpu(pdu->signal);
-               ops->stop(client, cmd);
+               ops->stop(client);
                break;
                }
        case FIO_NET_CMD_ADD_JOB: {
@@ -1828,6 +1885,9 @@ static void request_client_etas(struct client_ops *ops)
        struct client_eta *eta;
        int skipped = 0;
 
+       if (eta_print == FIO_ETA_NEVER)
+               return;
+
        dprint(FD_NET, "client: request eta (%d)\n", nr_clients);
 
        eta = calloc(1, sizeof(*eta) + __THREAD_RUNSTR_SZ(REAL_MAX_JOBS));
@@ -1897,16 +1957,19 @@ static int client_check_cmd_timeout(struct fio_client *client,
        int ret = 0;
 
        flist_for_each_safe(entry, tmp, &client->cmd_list) {
+               unsigned int op;
+
                reply = flist_entry(entry, struct fio_net_cmd_reply, list);
 
                if (mtime_since(&reply->ts, now) < FIO_NET_CLIENT_TIMEOUT)
                        continue;
 
+               op = reply->opcode;
                if (!handle_cmd_timeout(client, reply))
                        continue;
 
                log_err("fio: client %s, timeout on cmd %s\n", client->hostname,
-                                               fio_server_op(reply->opcode));
+                                               fio_server_op(op));
                ret = 1;
        }
 
@@ -1936,7 +1999,10 @@ static int fio_check_clients_timed_out(void)
                else
                        log_err("fio: client %s timed out\n", client->hostname);
 
-               client->error = ETIMEDOUT;
+               if (client->last_cmd != FIO_NET_CMD_VTRIGGER)
+                       client->error = ETIMEDOUT;
+               else
+                       log_info("fio: ignoring timeout due to vtrigger\n");
                remove_client(client);
                ret = 1;
        }
@@ -1985,7 +2051,7 @@ int fio_handle_clients(struct client_ops *ops)
                        int timeout;
 
                        fio_gettime(&ts, NULL);
-                       if (mtime_since(&eta_ts, &ts) >= 900) {
+                       if (eta_time_within_slack(mtime_since(&eta_ts, &ts))) {
                                request_client_etas(ops);
                                memcpy(&eta_ts, &ts, sizeof(ts));