-
- if (td->o.odirect || td->o.mem_align ||
- (td->io_ops->flags & FIO_RAWIO))
- p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align;
- else
- p = td->orig_buffer;
-
- cl_align = os_cache_line_size();
-
- for (i = 0; i < max_units; i++) {
- void *ptr;
-
- if (td->terminate)
- return 1;
-
- ptr = fio_memalign(cl_align, sizeof(*io_u));
- if (!ptr) {
- log_err("fio: unable to allocate aligned memory\n");
- break;
- }
-
- io_u = ptr;
- memset(io_u, 0, sizeof(*io_u));
- INIT_FLIST_HEAD(&io_u->list);
- dprint(FD_MEM, "io_u alloc %p, index %u\n", io_u, i);
-
- if (!(td->io_ops->flags & FIO_NOIO)) {
- io_u->buf = p + max_bs * i;
- dprint(FD_MEM, "io_u %p, mem %p\n", io_u, io_u->buf);
-
- if (td_write(td))
- io_u_fill_buffer(td, io_u, max_bs);
- if (td_write(td) && td->o.verify_pattern_bytes) {
- /*
- * Fill the buffer with the pattern if we are
- * going to be doing writes.
- */
- fill_pattern(td, io_u->buf, max_bs, io_u, 0, 0);
- }
- }
-
- io_u->index = i;
- io_u->flags = IO_U_F_FREE;
- flist_add(&io_u->list, &td->io_u_freelist);
- }
-
- return 0;
-}
-
-static int switch_ioscheduler(struct thread_data *td)
-{
- char tmp[256], tmp2[128];
- FILE *f;
- int ret;
-
- if (td->io_ops->flags & FIO_DISKLESSIO)
- return 0;
-
- sprintf(tmp, "%s/queue/scheduler", td->sysfs_root);
-
- f = fopen(tmp, "r+");
- if (!f) {
- if (errno == ENOENT) {
- log_err("fio: os or kernel doesn't support IO scheduler"
- " switching\n");
- return 0;
- }
- td_verror(td, errno, "fopen iosched");
- return 1;
- }
-
- /*
- * Set io scheduler.
- */
- ret = fwrite(td->o.ioscheduler, strlen(td->o.ioscheduler), 1, f);
- if (ferror(f) || ret != 1) {
- td_verror(td, errno, "fwrite");
- fclose(f);
- return 1;
- }
-
- rewind(f);
-
- /*
- * Read back and check that the selected scheduler is now the default.
- */
- ret = fread(tmp, 1, sizeof(tmp), f);
- if (ferror(f) || ret < 0) {
- td_verror(td, errno, "fread");
- fclose(f);
- return 1;
- }
-
- sprintf(tmp2, "[%s]", td->o.ioscheduler);
- if (!strstr(tmp, tmp2)) {
- log_err("fio: io scheduler %s not found\n", td->o.ioscheduler);
- td_verror(td, EINVAL, "iosched_switch");
- fclose(f);
- return 1;
- }
-
- fclose(f);
- return 0;
-}
-
-static int keep_running(struct thread_data *td)
-{
- unsigned long long io_done;
-
- if (td->done)
- return 0;
- if (td->o.time_based)
- return 1;
- if (td->o.loops) {
- td->o.loops--;
- return 1;
- }
-
- io_done = td->io_bytes[DDIR_READ] + td->io_bytes[DDIR_WRITE]
- + td->io_skip_bytes;
- if (io_done < td->o.size)
- return 1;
-
- return 0;
-}
-
-static void reset_io_counters(struct thread_data *td)
-{
- td->ts.stat_io_bytes[0] = td->ts.stat_io_bytes[1] = 0;
- td->this_io_bytes[0] = td->this_io_bytes[1] = 0;
- td->zone_bytes = 0;
- td->rate_bytes[0] = td->rate_bytes[1] = 0;
- td->rate_blocks[0] = td->rate_blocks[1] = 0;
-
- td->last_was_sync = 0;
-
- /*
- * reset file done count if we are to start over
- */
- if (td->o.time_based || td->o.loops)
- td->nr_done_files = 0;
-}
-
-void reset_all_stats(struct thread_data *td)
-{
- struct timeval tv;
- int i;
-
- reset_io_counters(td);
-
- for (i = 0; i < 2; i++) {
- td->io_bytes[i] = 0;
- td->io_blocks[i] = 0;
- td->io_issues[i] = 0;
- td->ts.total_io_u[i] = 0;
- }
-
- fio_gettime(&tv, NULL);
- td->ts.runtime[0] = 0;
- td->ts.runtime[1] = 0;
- memcpy(&td->epoch, &tv, sizeof(tv));
- memcpy(&td->start, &tv, sizeof(tv));
-}
-
-static void clear_io_state(struct thread_data *td)
-{
- struct fio_file *f;
- unsigned int i;
-
- reset_io_counters(td);
-
- close_files(td);
- for_each_file(td, f, i)
- fio_file_clear_done(f);
-
- /*
- * Set the same seed to get repeatable runs
- */
- td_fill_rand_seeds(td);
-}
-
-static int exec_string(const char *string)
-{
- int ret, newlen = strlen(string) + 1 + 8;
- char *str;
-
- str = malloc(newlen);
- sprintf(str, "sh -c %s", string);
-
- ret = system(str);
- if (ret == -1)
- log_err("fio: exec of cmd <%s> failed\n", str);
-
- free(str);
- return ret;
-}
-
-/*
- * Entry point for the thread based jobs. The process based jobs end up
- * here as well, after a little setup.
- */
-static void *thread_main(void *data)
-{
- unsigned long long elapsed;
- struct thread_data *td = data;
- pthread_condattr_t attr;
- int clear_state;
-
- if (!td->o.use_thread) {
- setsid();
- td->pid = getpid();
- } else
- td->pid = gettid();
-
- dprint(FD_PROCESS, "jobs pid=%d started\n", (int) td->pid);
-
- INIT_FLIST_HEAD(&td->io_u_freelist);
- INIT_FLIST_HEAD(&td->io_u_busylist);
- INIT_FLIST_HEAD(&td->io_u_requeues);
- INIT_FLIST_HEAD(&td->io_log_list);
- INIT_FLIST_HEAD(&td->io_hist_list);
- INIT_FLIST_HEAD(&td->verify_list);
- INIT_FLIST_HEAD(&td->trim_list);
- pthread_mutex_init(&td->io_u_lock, NULL);
- td->io_hist_tree = RB_ROOT;
-
- pthread_condattr_init(&attr);
- pthread_cond_init(&td->verify_cond, &attr);
- pthread_cond_init(&td->free_cond, &attr);
-
- td_set_runstate(td, TD_INITIALIZED);
- dprint(FD_MUTEX, "up startup_mutex\n");
- fio_mutex_up(startup_mutex);
- dprint(FD_MUTEX, "wait on td->mutex\n");
- fio_mutex_down(td->mutex);
- dprint(FD_MUTEX, "done waiting on td->mutex\n");
-
- /*
- * the ->mutex mutex is now no longer used, close it to avoid
- * eating a file descriptor
- */
- fio_mutex_remove(td->mutex);
-
- /*
- * A new gid requires privilege, so we need to do this before setting
- * the uid.
- */
- if (td->o.gid != -1U && setgid(td->o.gid)) {
- td_verror(td, errno, "setgid");
- goto err;
- }
- if (td->o.uid != -1U && setuid(td->o.uid)) {
- td_verror(td, errno, "setuid");
- goto err;
- }
-
- /*
- * If we have a gettimeofday() thread, make sure we exclude that
- * thread from this job
- */
- if (td->o.gtod_cpu)
- fio_cpu_clear(&td->o.cpumask, td->o.gtod_cpu);
-
- /*
- * Set affinity first, in case it has an impact on the memory
- * allocations.
- */
- if (td->o.cpumask_set && fio_setaffinity(td->pid, td->o.cpumask) == -1) {
- td_verror(td, errno, "cpu_set_affinity");
- goto err;
- }
-
- /*
- * May alter parameters that init_io_u() will use, so we need to
- * do this first.
- */
- if (init_iolog(td))
- goto err;
-
- if (init_io_u(td))
- goto err;
-
- if (td->o.verify_async && verify_async_init(td))
- goto err;
-
- if (td->ioprio_set) {
- if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
- td_verror(td, errno, "ioprio_set");
- goto err;
- }
- }
-
- if (td->o.cgroup_weight && cgroup_setup(td, cgroup_list, &cgroup_mnt))
- goto err;
-
- if (nice(td->o.nice) == -1) {
- td_verror(td, errno, "nice");
- goto err;
- }
-
- if (td->o.ioscheduler && switch_ioscheduler(td))
- goto err;
-
- if (!td->o.create_serialize && setup_files(td))
- goto err;
-
- if (td_io_init(td))
- goto err;
-
- if (init_random_map(td))
- goto err;
-
- if (td->o.exec_prerun) {
- if (exec_string(td->o.exec_prerun))
- goto err;
- }
-
- if (td->o.pre_read) {
- if (pre_read_files(td) < 0)
- goto err;
- }
-
- fio_gettime(&td->epoch, NULL);
- getrusage(RUSAGE_SELF, &td->ts.ru_start);
-
- clear_state = 0;
- while (keep_running(td)) {
- fio_gettime(&td->start, NULL);
- memcpy(&td->ts.stat_sample_time[0], &td->start,
- sizeof(td->start));
- memcpy(&td->ts.stat_sample_time[1], &td->start,
- sizeof(td->start));
- memcpy(&td->tv_cache, &td->start, sizeof(td->start));
-
- if (td->o.ratemin[0] || td->o.ratemin[1])
- memcpy(&td->lastrate, &td->ts.stat_sample_time,
- sizeof(td->lastrate));
-
- if (clear_state)
- clear_io_state(td);
-
- prune_io_piece_log(td);
-
- do_io(td);
-
- clear_state = 1;
-
- if (td_read(td) && td->io_bytes[DDIR_READ]) {
- elapsed = utime_since_now(&td->start);
- td->ts.runtime[DDIR_READ] += elapsed;
- }
- if (td_write(td) && td->io_bytes[DDIR_WRITE]) {
- elapsed = utime_since_now(&td->start);
- td->ts.runtime[DDIR_WRITE] += elapsed;
- }
-
- if (td->error || td->terminate)
- break;
-
- if (!td->o.do_verify ||
- td->o.verify == VERIFY_NONE ||
- (td->io_ops->flags & FIO_UNIDIR))
- continue;
-
- clear_io_state(td);
-
- fio_gettime(&td->start, NULL);
-
- do_verify(td);
-
- td->ts.runtime[DDIR_READ] += utime_since_now(&td->start);
-
- if (td->error || td->terminate)
- break;
- }
-
- update_rusage_stat(td);
- td->ts.runtime[0] = (td->ts.runtime[0] + 999) / 1000;
- td->ts.runtime[1] = (td->ts.runtime[1] + 999) / 1000;
- td->ts.total_run_time = mtime_since_now(&td->epoch);
- td->ts.io_bytes[0] = td->io_bytes[0];
- td->ts.io_bytes[1] = td->io_bytes[1];
-
- fio_mutex_down(writeout_mutex);
- if (td->ts.bw_log) {
- if (td->o.bw_log_file) {
- finish_log_named(td, td->ts.bw_log,
- td->o.bw_log_file, "bw");
- } else
- finish_log(td, td->ts.bw_log, "bw");
- }
- if (td->ts.lat_log) {
- if (td->o.lat_log_file) {
- finish_log_named(td, td->ts.lat_log,
- td->o.lat_log_file, "lat");
- } else
- finish_log(td, td->ts.lat_log, "lat");
- }
- if (td->ts.slat_log) {
- if (td->o.lat_log_file) {
- finish_log_named(td, td->ts.slat_log,
- td->o.lat_log_file, "slat");
- } else
- finish_log(td, td->ts.slat_log, "slat");
- }
- if (td->ts.clat_log) {
- if (td->o.lat_log_file) {
- finish_log_named(td, td->ts.clat_log,
- td->o.lat_log_file, "clat");
- } else
- finish_log(td, td->ts.clat_log, "clat");
- }
- fio_mutex_up(writeout_mutex);
- if (td->o.exec_postrun)
- exec_string(td->o.exec_postrun);
-
- if (exitall_on_terminate)
- terminate_threads(td->groupid);
-
-err:
- if (td->error)
- log_info("fio: pid=%d, err=%d/%s\n", (int) td->pid, td->error,
- td->verror);
-
- if (td->o.verify_async)
- verify_async_exit(td);
-
- close_and_free_files(td);
- close_ioengine(td);
- cleanup_io_u(td);
- cgroup_shutdown(td, &cgroup_mnt);
-
- if (td->o.cpumask_set) {
- int ret = fio_cpuset_exit(&td->o.cpumask);
-
- td_verror(td, ret, "fio_cpuset_exit");
- }
-
- /*
- * do this very late, it will log file closing as well
- */
- if (td->o.write_iolog_file)
- write_iolog_close(td);
-
- options_mem_free(td);
- td_set_runstate(td, TD_EXITED);
- return (void *) (unsigned long) td->error;
-}
-
-/*
- * We cannot pass the td data into a forked process, so attach the td and
- * pass it to the thread worker.
- */
-static int fork_main(int shmid, int offset)
-{
- struct thread_data *td;
- void *data, *ret;
-
-#ifndef __hpux
- data = shmat(shmid, NULL, 0);
- if (data == (void *) -1) {
- int __err = errno;
-
- perror("shmat");
- return __err;
- }