int groupid = 0;
unsigned int thread_number = 0;
+unsigned int nr_segments = 0;
+unsigned int cur_segment = 0;
unsigned int stat_number = 0;
-int shm_id = 0;
int temp_stall_ts;
unsigned long done_secs = 0;
+#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
+pthread_mutex_t overlap_check = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
pthread_mutex_t overlap_check = PTHREAD_MUTEX_INITIALIZER;
+#endif
#define JOB_START_TIMEOUT (5 * 1000)
static void sig_int(int sig)
{
- if (threads) {
+ if (nr_segments) {
if (is_backend)
fio_server_got_signal(sig);
else {
}
}
+#ifdef WIN32
+static void sig_break(int sig)
+{
+ struct thread_data *td;
+ int i;
+
+ sig_int(sig);
+
+ /**
+ * Windows terminates all job processes on SIGBREAK after the handler
+ * returns, so give them time to wrap-up and give stats
+ */
+ for_each_td(td, i) {
+ while (td->runstate < TD_EXITED)
+ sleep(1);
+ }
+}
+#endif
+
void sig_show_status(int sig)
{
show_running_run_stats();
/* Windows uses SIGBREAK as a quit signal from other applications */
#ifdef WIN32
memset(&act, 0, sizeof(act));
- act.sa_handler = sig_int;
+ act.sa_handler = sig_break;
act.sa_flags = SA_RESTART;
sigaction(SIGBREAK, &act, NULL);
#endif
static bool __check_min_rate(struct thread_data *td, struct timespec *now,
enum fio_ddir ddir)
{
- unsigned long long bytes = 0;
- unsigned long iops = 0;
- unsigned long spent;
- unsigned long rate;
- unsigned int ratemin = 0;
- unsigned int rate_iops = 0;
- unsigned int rate_iops_min = 0;
+ unsigned long long current_rate_check_bytes = td->this_io_bytes[ddir];
+ unsigned long current_rate_check_blocks = td->this_io_blocks[ddir];
+ unsigned long long option_rate_bytes_min = td->o.ratemin[ddir];
+ unsigned int option_rate_iops_min = td->o.rate_iops_min[ddir];
assert(ddir_rw(ddir));
if (mtime_since(&td->start, now) < 2000)
return false;
- iops += td->this_io_blocks[ddir];
- bytes += td->this_io_bytes[ddir];
- ratemin += td->o.ratemin[ddir];
- rate_iops += td->o.rate_iops[ddir];
- rate_iops_min += td->o.rate_iops_min[ddir];
-
/*
- * if rate blocks is set, sample is running
+ * if last_rate_check_blocks or last_rate_check_bytes is set,
+ * we can compute a rate per ratecycle
*/
- if (td->rate_bytes[ddir] || td->rate_blocks[ddir]) {
- spent = mtime_since(&td->lastrate[ddir], now);
- if (spent < td->o.ratecycle)
+ if (td->last_rate_check_bytes[ddir] || td->last_rate_check_blocks[ddir]) {
+ unsigned long spent = mtime_since(&td->last_rate_check_time[ddir], now);
+ if (spent < td->o.ratecycle || spent==0)
return false;
- if (td->o.rate[ddir] || td->o.ratemin[ddir]) {
+ if (td->o.ratemin[ddir]) {
/*
* check bandwidth specified rate
*/
- if (bytes < td->rate_bytes[ddir]) {
- log_err("%s: rate_min=%uB/s not met, only transferred %lluB\n",
- td->o.name, ratemin, bytes);
+ unsigned long long current_rate_bytes =
+ ((current_rate_check_bytes - td->last_rate_check_bytes[ddir]) * 1000) / spent;
+ if (current_rate_bytes < option_rate_bytes_min) {
+ log_err("%s: rate_min=%lluB/s not met, got %lluB/s\n",
+ td->o.name, option_rate_bytes_min, current_rate_bytes);
return true;
- } else {
- if (spent)
- rate = ((bytes - td->rate_bytes[ddir]) * 1000) / spent;
- else
- rate = 0;
-
- if (rate < ratemin ||
- bytes < td->rate_bytes[ddir]) {
- log_err("%s: rate_min=%uB/s not met, got %luB/s\n",
- td->o.name, ratemin, rate);
- return true;
- }
}
} else {
/*
* checks iops specified rate
*/
- if (iops < rate_iops) {
- log_err("%s: rate_iops_min=%u not met, only performed %lu IOs\n",
- td->o.name, rate_iops, iops);
+ unsigned long long current_rate_iops =
+ ((current_rate_check_blocks - td->last_rate_check_blocks[ddir]) * 1000) / spent;
+
+ if (current_rate_iops < option_rate_iops_min) {
+ log_err("%s: rate_iops_min=%u not met, got %llu IOPS\n",
+ td->o.name, option_rate_iops_min, current_rate_iops);
return true;
- } else {
- if (spent)
- rate = ((iops - td->rate_blocks[ddir]) * 1000) / spent;
- else
- rate = 0;
-
- if (rate < rate_iops_min ||
- iops < td->rate_blocks[ddir]) {
- log_err("%s: rate_iops_min=%u not met, got %lu IOPS\n",
- td->o.name, rate_iops_min, rate);
- return true;
- }
}
}
}
- td->rate_bytes[ddir] = bytes;
- td->rate_blocks[ddir] = iops;
- memcpy(&td->lastrate[ddir], now, sizeof(*now));
+ td->last_rate_check_bytes[ddir] = current_rate_check_bytes;
+ td->last_rate_check_blocks[ddir] = current_rate_check_blocks;
+ memcpy(&td->last_rate_check_time[ddir], now, sizeof(*now));
return false;
}
{
bool ret = false;
- if (td->bytes_done[DDIR_READ])
- ret |= __check_min_rate(td, now, DDIR_READ);
- if (td->bytes_done[DDIR_WRITE])
- ret |= __check_min_rate(td, now, DDIR_WRITE);
- if (td->bytes_done[DDIR_TRIM])
- ret |= __check_min_rate(td, now, DDIR_TRIM);
+ for_each_rw_ddir(ddir) {
+ if (td->bytes_done[ddir])
+ ret |= __check_min_rate(td, now, ddir);
+ }
return ret;
}
td_clear_error(td);
*retptr = 0;
return false;
- } else if (td->o.fill_device && err == ENOSPC) {
+ } else if (td->o.fill_device && (err == ENOSPC || err == EDQUOT)) {
/*
* We expect to hit this error if
* fill_device option is set.
if ((full && !min_evts) || !td->o.iodepth_batch_complete_min)
min_evts = 1;
- if (time && __should_check_rate(td))
+ if (time && should_check_rate(td))
fio_gettime(time, NULL);
do {
requeue_io_u(td, &io_u);
} else {
sync_done:
- if (comp_time && __should_check_rate(td))
+ if (comp_time && should_check_rate(td))
fio_gettime(comp_time, NULL);
*ret = io_u_sync_complete(td, io_u);
break;
}
} else {
- if (ddir_rw_sum(td->bytes_done) + td->o.rw_min_bs > verify_bytes)
+ if (td->bytes_verified + td->o.rw_min_bs > verify_bytes)
break;
while ((io_u = get_io_u(td)) != NULL) {
break;
} else if (io_u->ddir == DDIR_WRITE) {
io_u->ddir = DDIR_READ;
+ io_u->numberio = td->verify_read_issues;
+ td->verify_read_issues++;
populate_verify_io_u(td, io_u);
break;
} else {
if (td->o.rate_process == RATE_PROCESS_POISSON) {
uint64_t val, iops;
- iops = bps / td->o.bs[ddir];
+ iops = bps / td->o.min_bs[ddir];
val = (int64_t) (1000000 / iops) *
-logf(__rand_0_1(&td->poisson_state[ddir]));
if (val) {
return 0;
}
-static void handle_thinktime(struct thread_data *td, enum fio_ddir ddir)
+static void init_thinktime(struct thread_data *td)
+{
+ if (td->o.thinktime_blocks_type == THINKTIME_BLOCKS_TYPE_COMPLETE)
+ td->thinktime_blocks_counter = td->io_blocks;
+ else
+ td->thinktime_blocks_counter = td->io_issues;
+ td->last_thinktime = td->epoch;
+ td->last_thinktime_blocks = 0;
+}
+
+static void handle_thinktime(struct thread_data *td, enum fio_ddir ddir,
+ struct timespec *time)
{
unsigned long long b;
uint64_t total;
int left;
+ struct timespec now;
+ bool stall = false;
+
+ if (td->o.thinktime_iotime) {
+ fio_gettime(&now, NULL);
+ if (utime_since(&td->last_thinktime, &now)
+ >= td->o.thinktime_iotime + td->o.thinktime) {
+ stall = true;
+ } else if (!fio_option_is_set(&td->o, thinktime_blocks)) {
+ /*
+ * When thinktime_iotime is set and thinktime_blocks is
+ * not set, skip the thinktime_blocks check, since
+ * thinktime_blocks default value 1 does not work
+ * together with thinktime_iotime.
+ */
+ return;
+ }
+
+ }
+
+ b = ddir_rw_sum(td->thinktime_blocks_counter);
+ if (b >= td->last_thinktime_blocks + td->o.thinktime_blocks)
+ stall = true;
- b = ddir_rw_sum(td->io_blocks);
- if (b % td->o.thinktime_blocks)
+ if (!stall)
return;
io_u_quiesce(td);
/* adjust for rate_process=poisson */
td->last_usec[ddir] += total;
}
+
+ if (time && should_check_rate(td))
+ fio_gettime(time, NULL);
+
+ td->last_thinktime_blocks = b;
+ if (td->o.thinktime_iotime)
+ td->last_thinktime = now;
}
/*
total_bytes += td->o.size;
/* In trimwrite mode, each byte is trimmed and then written, so
- * allow total_bytes to be twice as big */
- if (td_trimwrite(td))
+ * allow total_bytes or number of ios to be twice as big */
+ if (td_trimwrite(td)) {
total_bytes += td->total_io_size;
+ td->o.number_ios *= 2;
+ }
while ((td->o.read_iolog_file && !flist_empty(&td->io_log_list)) ||
(!flist_empty(&td->trim_list)) || !io_issue_bytes_exceeded(td) ||
break;
}
- if (io_u->ddir == DDIR_WRITE && td->flags & TD_F_DO_VERIFY)
+ if (io_u->ddir == DDIR_WRITE && td->flags & TD_F_DO_VERIFY) {
+ io_u->numberio = td->io_issues[io_u->ddir];
populate_verify_io_u(td, io_u);
+ }
ddir = io_u->ddir;
if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_READ &&
((io_u->flags & IO_U_F_VER_LIST) || !td_rw(td))) {
- if (!td->o.verify_pattern_bytes) {
- io_u->rand_seed = __rand(&td->verify_state);
- if (sizeof(int) != sizeof(long *))
- io_u->rand_seed *= __rand(&td->verify_state);
- }
-
if (verify_state_should_stop(td, io_u)) {
put_io_u(td, io_u);
break;
td->rate_io_issue_bytes[__ddir] += blen;
}
- if (should_check_rate(td))
+ if (should_check_rate(td)) {
td->rate_next_io_time[__ddir] = usec_for_io(td, __ddir);
+ fio_gettime(&comp_time, NULL);
+ }
} else {
ret = io_u_submit(td, io_u);
}
if (ret < 0)
break;
+
+ if (ddir_rw(ddir) && td->o.thinktime)
+ handle_thinktime(td, ddir, &comp_time);
+
if (!ddir_rw_sum(td->bytes_done) &&
!td_ioengine_flagged(td, FIO_NOIO))
continue;
}
if (!in_ramp_time(td) && td->o.latency_target)
lat_target_check(td);
-
- if (ddir_rw(ddir) && td->o.thinktime)
- handle_thinktime(td, ddir);
}
check_update_rusage(td);
if (td->trim_entries)
log_err("fio: %lu trim entries leaked?\n", td->trim_entries);
- if (td->o.fill_device && td->error == ENOSPC) {
+ if (td->o.fill_device && (td->error == ENOSPC || td->error == EDQUOT)) {
td->error = 0;
fio_mark_td_terminate(td);
}
if (i) {
ret = io_u_queued_complete(td, i);
- if (td->o.fill_device && td->error == ENOSPC)
+ if (td->o.fill_device &&
+ (td->error == ENOSPC || td->error == EDQUOT))
td->error = 0;
}
f->file_name);
}
}
- } else
+ } else {
+ if (td->o.io_submit_mode == IO_MODE_OFFLOAD)
+ workqueue_flush(&td->io_wq);
cleanup_pending_aio(td);
+ }
/*
* stop job if we failed doing any IO
}
}
- init_io_u_buffers(td);
+ if (init_io_u_buffers(td))
+ return 1;
if (init_file_completion_logging(td, max_units))
return 1;
* overflow later. this adjustment may be too much if we get
* lucky and the allocator gives us an aligned address.
*/
- if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
+ if (td->o.odirect || td->o.mem_align ||
td_ioengine_flagged(td, FIO_RAWIO))
td->orig_buffer_size += page_mask + td->o.mem_align;
if (data_xfer && allocate_io_mem(td))
return 1;
- if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
+ if (td->o.odirect || td->o.mem_align ||
td_ioengine_flagged(td, FIO_RAWIO))
p = PTR_ALIGN(td->orig_buffer, page_mask) + td->o.mem_align;
else
return 0;
}
+#ifdef FIO_HAVE_IOSCHED_SWITCH
/*
- * This function is Linux specific.
+ * These functions are Linux specific.
* FIO_HAVE_IOSCHED_SWITCH enabled currently means it's Linux.
*/
-static int switch_ioscheduler(struct thread_data *td)
+static int set_ioscheduler(struct thread_data *td, struct fio_file *file)
{
-#ifdef FIO_HAVE_IOSCHED_SWITCH
char tmp[256], tmp2[128], *p;
FILE *f;
int ret;
- if (td_ioengine_flagged(td, FIO_DISKLESSIO))
- return 0;
-
- assert(td->files && td->files[0]);
- sprintf(tmp, "%s/queue/scheduler", td->files[0]->du->sysfs_root);
+ assert(file->du && file->du->sysfs_root);
+ sprintf(tmp, "%s/queue/scheduler", file->du->sysfs_root);
f = fopen(tmp, "r+");
if (!f) {
sprintf(tmp2, "[%s]", td->o.ioscheduler);
if (!strstr(tmp, tmp2)) {
- log_err("fio: io scheduler %s not found\n", td->o.ioscheduler);
+ log_err("fio: unable to set io scheduler to %s\n", td->o.ioscheduler);
td_verror(td, EINVAL, "iosched_switch");
fclose(f);
return 1;
fclose(f);
return 0;
+}
+
+static int switch_ioscheduler(struct thread_data *td)
+{
+ struct fio_file *f;
+ unsigned int i;
+ int ret = 0;
+
+ if (td_ioengine_flagged(td, FIO_DISKLESSIO))
+ return 0;
+
+ assert(td->files && td->files[0]);
+
+ for_each_file(td, f, i) {
+
+ /* Only consider regular files and block device files */
+ switch (f->filetype) {
+ case FIO_TYPE_FILE:
+ case FIO_TYPE_BLOCK:
+ /*
+ * Make sure that the device hosting the file could
+ * be determined.
+ */
+ if (!f->du)
+ continue;
+ break;
+ case FIO_TYPE_CHAR:
+ case FIO_TYPE_PIPE:
+ default:
+ continue;
+ }
+
+ ret = set_ioscheduler(td, f);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
#else
+
+static int switch_ioscheduler(struct thread_data *td)
+{
return 0;
-#endif
}
+#endif /* FIO_HAVE_IOSCHED_SWITCH */
+
static bool keep_running(struct thread_data *td)
{
unsigned long long limit;
return false;
}
-static int exec_string(struct thread_options *o, const char *string, const char *mode)
+static int exec_string(struct thread_options *o, const char *string,
+ const char *mode)
{
- size_t newlen = strlen(string) + strlen(o->name) + strlen(mode) + 13 + 1;
int ret;
char *str;
- str = malloc(newlen);
- sprintf(str, "%s > %s.%s.txt 2>&1", string, o->name, mode);
+ if (asprintf(&str, "%s > %s.%s.txt 2>&1", string, o->name, mode) < 0)
+ return -1;
- log_info("%s : Saving output of %s in %s.%s.txt\n",o->name, mode, o->name, mode);
+ log_info("%s : Saving output of %s in %s.%s.txt\n", o->name, mode,
+ o->name, mode);
ret = system(str);
if (ret == -1)
log_err("fio: exec of cmd <%s> failed\n", str);
uint64_t bytes_done[DDIR_RWDIR_CNT];
int deadlock_loop_cnt;
bool clear_state;
- int ret;
+ int res, ret;
sk_out_assign(sk_out);
free(fd);
if (!init_iolog(td))
goto err;
+ /* ioprio_set() has to be done before td_io_init() */
+ if (fio_option_is_set(o, ioprio) ||
+ fio_option_is_set(o, ioprio_class)) {
+ ret = ioprio_set(IOPRIO_WHO_PROCESS, 0, o->ioprio_class, o->ioprio);
+ if (ret == -1) {
+ td_verror(td, errno, "ioprio_set");
+ goto err;
+ }
+ td->ioprio = ioprio_value(o->ioprio_class, o->ioprio);
+ td->ts.ioprio = td->ioprio;
+ }
+
if (td_io_init(td))
goto err;
+ if (td_ioengine_flagged(td, FIO_SYNCIO) && td->o.iodepth > 1 && td->o.io_submit_mode != IO_MODE_OFFLOAD) {
+ log_info("note: both iodepth >= 1 and synchronous I/O engine "
+ "are selected, queue depth will be capped at 1\n");
+ }
+
if (init_io_u(td))
goto err;
if (o->verify_async && verify_async_init(td))
goto err;
- if (fio_option_is_set(o, ioprio) ||
- fio_option_is_set(o, ioprio_class)) {
- ret = ioprio_set(IOPRIO_WHO_PROCESS, 0, o->ioprio_class, o->ioprio);
- if (ret == -1) {
- td_verror(td, errno, "ioprio_set");
- goto err;
- }
- }
-
if (o->cgroup && cgroup_setup(td, cgroup_list, &cgroup_mnt))
goto err;
if (!init_random_map(td))
goto err;
- if (o->exec_prerun && exec_string(o, o->exec_prerun, (const char *)"prerun"))
+ if (o->exec_prerun && exec_string(o, o->exec_prerun, "prerun"))
goto err;
if (o->pre_read && !pre_read_files(td))
if (rate_submit_init(td, sk_out))
goto err;
- set_epoch_time(td, o->log_unix_epoch);
+ set_epoch_time(td, o->log_unix_epoch | o->log_alternate_epoch, o->log_alternate_epoch_clock_id);
fio_getrusage(&td->ru_start);
memcpy(&td->bw_sample_time, &td->epoch, sizeof(td->epoch));
memcpy(&td->iops_sample_time, &td->epoch, sizeof(td->epoch));
memcpy(&td->ss.prev_time, &td->epoch, sizeof(td->epoch));
+ init_thinktime(td);
+
if (o->ratemin[DDIR_READ] || o->ratemin[DDIR_WRITE] ||
o->ratemin[DDIR_TRIM]) {
- memcpy(&td->lastrate[DDIR_READ], &td->bw_sample_time,
+ memcpy(&td->last_rate_check_time[DDIR_READ], &td->bw_sample_time,
sizeof(td->bw_sample_time));
- memcpy(&td->lastrate[DDIR_WRITE], &td->bw_sample_time,
+ memcpy(&td->last_rate_check_time[DDIR_WRITE], &td->bw_sample_time,
sizeof(td->bw_sample_time));
- memcpy(&td->lastrate[DDIR_TRIM], &td->bw_sample_time,
+ memcpy(&td->last_rate_check_time[DDIR_TRIM], &td->bw_sample_time,
sizeof(td->bw_sample_time));
}
}
} while (1);
- if (td_read(td) && td->io_bytes[DDIR_READ])
+ if (td->io_bytes[DDIR_READ] && (td_read(td) ||
+ ((td->flags & TD_F_VER_BACKLOG) && td_write(td))))
update_runtime(td, elapsed_us, DDIR_READ);
if (td_write(td) && td->io_bytes[DDIR_WRITE])
update_runtime(td, elapsed_us, DDIR_WRITE);
* offload mode so that we don't clean up this job while
* another thread is checking its io_u's for overlap
*/
- if (td_offload_overlap(td))
- pthread_mutex_lock(&overlap_check);
+ if (td_offload_overlap(td)) {
+ int res = pthread_mutex_lock(&overlap_check);
+ assert(res == 0);
+ }
td_set_runstate(td, TD_FINISHING);
- if (td_offload_overlap(td))
- pthread_mutex_unlock(&overlap_check);
+ if (td_offload_overlap(td)) {
+ res = pthread_mutex_unlock(&overlap_check);
+ assert(res == 0);
+ }
update_rusage_stat(td);
td->ts.total_run_time = mtime_since_now(&td->epoch);
- td->ts.io_bytes[DDIR_READ] = td->io_bytes[DDIR_READ];
- td->ts.io_bytes[DDIR_WRITE] = td->io_bytes[DDIR_WRITE];
- td->ts.io_bytes[DDIR_TRIM] = td->io_bytes[DDIR_TRIM];
+ for_each_rw_ddir(ddir) {
+ td->ts.io_bytes[ddir] = td->io_bytes[ddir];
+ }
if (td->o.verify_state_save && !(td->flags & TD_F_VSTATE_SAVED) &&
(td->o.verify != VERIFY_NONE && td_write(td)))
rate_submit_exit(td);
if (o->exec_postrun)
- exec_string(o, o->exec_postrun, (const char *)"postrun");
+ exec_string(o, o->exec_postrun, "postrun");
if (exitall_on_terminate || (o->exitall_error && td->error))
fio_terminate_threads(td->groupid, td->o.exit_what);
for_each_td(td, i) {
int flags = 0;
- if (!strcmp(td->o.ioengine, "cpuio"))
+ if (!strcmp(td->o.ioengine, "cpuio"))
cputhreads++;
else
realthreads++;
done_secs += mtime_since_now(&td->epoch) / 1000;
profile_td_exit(td);
+ flow_exit_job(td);
}
if (*nr_running == cputhreads && !pending && realthreads)
strerror(ret));
} else {
pid_t pid;
+ void *eo;
dprint(FD_PROCESS, "will fork\n");
+ eo = td->eo;
+ read_barrier();
pid = fork();
if (!pid) {
int ret;
_exit(ret);
} else if (i == fio_debug_jobno)
*fio_debug_jobp = pid;
+ free(eo);
+ free(fd);
+ fd = NULL;
}
dprint(FD_MUTEX, "wait on startup_sem\n");
if (fio_sem_down_timeout(startup_sem, 10000)) {
setup_log(&agg_io_log[DDIR_TRIM], &p, "agg-trim_bw.log");
}
+ if (init_global_dedupe_working_set_seeds()) {
+ log_err("fio: failed to initialize global dedupe working set\n");
+ return 1;
+ }
+
startup_sem = fio_sem_init(FIO_SEM_LOCKED);
if (!sk_out)
is_local_backend = true;
}
for_each_td(td, i) {
+ struct thread_stat *ts = &td->ts;
+
+ free_clat_prio_stats(ts);
steadystate_free(td);
fio_options_free(td);
+ fio_dump_options_free(td);
if (td->rusage_sem) {
fio_sem_remove(td->rusage_sem);
td->rusage_sem = NULL;