#!/bin/sh
GVF=FIO-VERSION-FILE
-DEF_VER=fio-2.18
+DEF_VER=fio-2.19
LF='
'
If the option accepts an upper and lower range, use a colon ':' or
minus '-' to separate such values. See :ref:`irange <irange>`.
+ If the lower value specified happens to be larger than the upper value,
+ two values are swapped.
.. _bool:
blocks given. For example, if you give 32 as a parameter, fio will sync the
file for every 32 writes issued. If fio is using non-buffered I/O, we may
not sync the file. The exception is the sg I/O engine, which synchronizes
- the disk cache anyway.
+ the disk cache anyway. Defaults to 0, which means no sync every certain
+ number of writes.
.. option:: fdatasync=int
Like :option:`fsync` but uses :manpage:`fdatasync(2)` to only sync data and
not metadata blocks. In Windows, FreeBSD, and DragonFlyBSD there is no
:manpage:`fdatasync(2)`, this falls back to using :manpage:`fsync(2)`.
+ Defaults to 0, which means no sync data every certain number of writes.
.. option:: write_barrier=int
**mmapshared**
Same as mmap, but use a MMAP_SHARED mapping.
+ **cudamalloc**
+ Use GPU memory as the buffers for GPUDirect RDMA benchmark.
+
The area allocated is a function of the maximum allowed bs size for the job,
multiplied by the I/O depth given. Note that for **shmhuge** and
**mmaphuge** to work, the system must have free huge pages allocated. This
**sync**
Basic :manpage:`read(2)` or :manpage:`write(2)`
I/O. :manpage:`lseek(2)` is used to position the I/O location.
+ See :option:`fsync` and :option:`fdatasync` for syncing write I/Os.
**psync**
Basic :manpage:`pread(2)` or :manpage:`pwrite(2)` I/O. Default on
DDIR_TRIM
does fallocate(,mode = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE).
+ **ftruncate**
+ I/O engine that sends :manpage:`ftruncate(2)` operations in response
+ to write (DDIR_WRITE) events. Each ftruncate issued sets the file's
+ size to the current block offset. Block size is ignored.
+
**e4defrag**
I/O engine that does regular EXT4_IOC_MOVE_EXT ioctls to simulate
defragment activity in request to DDIR_WRITE event.
.. option:: cpuload=int : [cpuio]
- Attempt to use the specified percentage of CPU cycles.
+ Attempt to use the specified percentage of CPU cycles. This is a mandatory
+ option when using cpuio I/O engine.
.. option:: cpuchunks=int : [cpuio]
eta.c verify.c memory.c io_u.c parse.c mutex.c options.c \
smalloc.c filehash.c profile.c debug.c engines/cpu.c \
engines/mmap.c engines/sync.c engines/null.c engines/net.c \
+ engines/ftruncate.c \
server.c client.c iolog.c backend.c libfio.c flow.c cconv.c \
gettime-thread.c helpers.c json.c idletime.c td_error.c \
profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
uint64_t val;
iops = bps / td->o.bs[ddir];
val = (int64_t) (1000000 / iops) *
- -logf(__rand_0_1(&td->poisson_state));
+ -logf(__rand_0_1(&td->poisson_state[ddir]));
if (val) {
- dprint(FD_RATE, "poisson rate iops=%llu\n",
- (unsigned long long) 1000000 / val);
+ dprint(FD_RATE, "poisson rate iops=%llu, ddir=%d\n",
+ (unsigned long long) 1000000 / val,
+ ddir);
}
- td->last_usec += val;
- return td->last_usec;
+ td->last_usec[ddir] += val;
+ return td->last_usec[ddir];
} else if (bps) {
secs = bytes / bps;
remainder = bytes % bps;
struct thread_data *td = fd->td;
struct thread_options *o = &td->o;
struct sk_out *sk_out = fd->sk_out;
+ uint64_t bytes_done[DDIR_RWDIR_CNT];
int deadlock_loop_cnt;
int clear_state;
int ret;
sizeof(td->bw_sample_time));
}
+ memset(bytes_done, 0, sizeof(bytes_done));
clear_state = 0;
+
while (keep_running(td)) {
uint64_t verify_bytes;
if (td->o.verify_only && td_write(td))
verify_bytes = do_dry_run(td);
else {
- uint64_t bytes_done[DDIR_RWDIR_CNT];
-
do_io(td, bytes_done);
if (!ddir_rw_sum(bytes_done)) {
break;
}
+ /*
+ * If td ended up with no I/O when it should have had,
+ * then something went wrong unless FIO_NOIO or FIO_DISKLESSIO.
+ * (Are we not missing other flags that can be ignored ?)
+ */
+ if ((td->o.size || td->o.io_size) && !ddir_rw_sum(bytes_done) &&
+ !(td_ioengine_flagged(td, FIO_NOIO) ||
+ td_ioengine_flagged(td, FIO_DISKLESSIO)))
+ log_err("%s: No I/O performed by %s, "
+ "perhaps try --debug=io option for details?\n",
+ td->o.name, td->io_ops->name);
+
td_set_runstate(td, TD_FINISHING);
update_rusage_stat(td);
if (o->write_iolog_file)
write_iolog_close(td);
- fio_mutex_remove(td->mutex);
- td->mutex = NULL;
-
td_set_runstate(td, TD_EXITED);
/*
return (void *) (uintptr_t) td->error;
}
-static void dump_td_info(struct thread_data *td)
-{
- log_err("fio: job '%s' (state=%d) hasn't exited in %lu seconds, it "
- "appears to be stuck. Doing forceful exit of this job.\n",
- td->o.name, td->runstate,
- (unsigned long) time_since_now(&td->terminate_time));
-}
-
/*
* Run over the job map and reap the threads that have exited, if any.
*/
if (td->terminate &&
td->runstate < TD_FSYNCING &&
time_since_now(&td->terminate_time) >= FIO_REAP_TIMEOUT) {
- dump_td_info(td);
+ log_err("fio: job '%s' (state=%d) hasn't exited in "
+ "%lu seconds, it appears to be stuck. Doing "
+ "forceful exit of this job.\n",
+ td->o.name, td->runstate,
+ (unsigned long) time_since_now(&td->terminate_time));
td_set_runstate(td, TD_REAPED);
goto reaped;
}
fio_mutex_remove(td->rusage_sem);
td->rusage_sem = NULL;
}
+ fio_mutex_remove(td->mutex);
+ td->mutex = NULL;
}
free_disk_util();
o->new_group = le32_to_cpu(top->new_group);
o->numjobs = le32_to_cpu(top->numjobs);
o->cpus_allowed_policy = le32_to_cpu(top->cpus_allowed_policy);
+ o->gpu_dev_id = le32_to_cpu(top->gpu_dev_id);
o->iolog = le32_to_cpu(top->iolog);
o->rwmixcycle = le32_to_cpu(top->rwmixcycle);
o->nice = le32_to_cpu(top->nice);
top->new_group = cpu_to_le32(o->new_group);
top->numjobs = cpu_to_le32(o->numjobs);
top->cpus_allowed_policy = cpu_to_le32(o->cpus_allowed_policy);
+ top->gpu_dev_id = cpu_to_le32(o->gpu_dev_id);
top->iolog = cpu_to_le32(o->iolog);
top->rwmixcycle = cpu_to_le32(o->rwmixcycle);
top->nice = cpu_to_le32(o->nice);
;;
--build-static) build_static="yes"
;;
- --enable-gfio)
- gfio_check="yes"
+ --enable-gfio) gfio_check="yes"
;;
--disable-numa) disable_numa="yes"
;;
+ --disable-rdma) disable_rdma="yes"
+ ;;
--disable-rbd) disable_rbd="yes"
;;
--disable-rbd-blkin) disable_rbd_blkin="yes"
;;
--disable-pmem) disable_pmem="yes"
;;
+ --enable-cuda) enable_cuda="yes"
+ ;;
--help)
show_help="yes"
;;
echo "--esx Configure build options for esx"
echo "--enable-gfio Enable building of gtk gfio"
echo "--disable-numa Disable libnuma even if found"
+ echo "--disable-rdma Disable RDMA support even if found"
echo "--disable-gfapi Disable gfapi"
echo "--enable-libhdfs Enable hdfs support"
echo "--disable-lex Disable use of lex/yacc for math"
echo "--enable-lex Enable use of lex/yacc for math"
echo "--disable-shm Disable SHM support"
echo "--disable-optimizations Don't enable compiler optimizations"
+ echo "--enable-cuda Enable GPUDirect RDMA support"
exit $exit_val
fi
# cross-compiling to one of these OSes then you'll need to specify
# the correct CPU with the --cpu option.
case $targetos in
-AIX)
+AIX|OpenBSD)
# Unless explicitly enabled, turn off lex.
+ # OpenBSD will hit syntax error when enabled.
if test -z "$disable_lex" ; then
disable_lex="yes"
else
##########################################
# POSIX pshared attribute probe
-posix_pshared="no"
+if test "$posix_pshared" != "yes" ; then
+ posix_pshared="no"
+fi
cat > $TMPC <<EOF
#include <unistd.h>
int main(void)
return 0;
}
EOF
-if compile_prog "" "-libverbs" "libverbs" ; then
+if test "$disable_rdma" != "yes" && compile_prog "" "-libverbs" "libverbs" ; then
libverbs="yes"
LIBS="-libverbs $LIBS"
fi
return 0;
}
EOF
-if compile_prog "" "-lrdmacm" "rdma"; then
+if test "$disable_rdma" != "yes" && compile_prog "" "-lrdmacm" "rdma"; then
rdmacm="yes"
LIBS="-lrdmacm $LIBS"
fi
fi
cat > $TMPC << EOF
#include <time.h>
+#include <string.h>
int main(int argc, char **argv)
{
volatile clockid_t cid;
- memset(&cid, 0, sizeof(cid));
+ memset((void*)&cid, 0, sizeof(cid));
return 0;
}
EOF
gdk_threads_enter();
gdk_threads_leave();
- printf("%d", GTK_CHECK_VERSION(2, 18, 0));
+ return GTK_CHECK_VERSION(2, 18, 0) ? 0 : 1; /* 0 on success */
}
EOF
GTK_CFLAGS=$(pkg-config --cflags gtk+-2.0 gthread-2.0)
exit 1
fi
if compile_prog "$GTK_CFLAGS" "$GTK_LIBS" "gfio" ; then
- r=$($TMPE)
- if test "$r" != "0" ; then
+ $TMPE
+ if test "$?" = "0" ; then
gfio="yes"
GFIO_LIBS="$LIBS $GTK_LIBS"
CFLAGS="$CFLAGS $GTK_CFLAGS"
echo "gtk 2.18 or higher $gfio"
fi
+##########################################
# Check whether we have getrusage(RUSAGE_THREAD)
if test "$rusage_thread" != "yes" ; then
rusage_thread="no"
int main(int argc, char **argv)
{
-
rados_t cluster;
rados_ioctx_t io_ctx;
const char pool[] = "rbd";
fi
echo "setvbuf $setvbuf"
+##########################################
# check for gfapi
if test "$gfapi" != "yes" ; then
gfapi="no"
int main(int argc, char **argv)
{
-
glfs_t *g = glfs_new("foo");
return 0;
# Report whether dev-dax engine is enabled
echo "NVML dev-dax engine $devdax"
+##########################################
# Check if we have lex/yacc available
yacc="no"
yacc_is_bison="no"
fi
echo "march_armv8_a_crc_crypto $march_armv8_a_crc_crypto"
+##########################################
+# cuda probe
+if test "$cuda" != "yes" ; then
+ cuda="no"
+fi
+cat > $TMPC << EOF
+#include <cuda.h>
+int main(int argc, char **argv)
+{
+ return cuInit(0);
+}
+EOF
+if test "$enable_cuda" = "yes" && compile_prog "" "-lcuda" "cuda"; then
+ cuda="yes"
+ LIBS="-lcuda $LIBS"
+fi
+echo "cuda $cuda"
#############################################################################
output_sym "CONFIG_RUSAGE_THREAD"
fi
if test "$gfio" = "yes" ; then
- echo "CONFIG_GFIO=y" >> $config_host_mak
+ output_sym "CONFIG_GFIO"
fi
if test "$esx" = "yes" ; then
output_sym "CONFIG_ESX"
if test "$disable_opt" = "yes" ; then
output_sym "CONFIG_DISABLE_OPTIMIZATIONS"
fi
-
if test "$zlib" = "no" ; then
echo "Consider installing zlib-dev (zlib-devel), some fio features depend on it."
fi
+if test "$cuda" = "yes" ; then
+ output_sym "CONFIG_CUDA"
+fi
echo "LIBS+=$LIBS" >> $config_host_mak
echo "GFIO_LIBS+=$GFIO_LIBS" >> $config_host_mak
len = io_u->xfer_buflen;
if (len != io_u->xfer_buflen) {
- io_u->resid = io_u->xfer_buflen - len;
- io_u->error = 0;
+ if (len) {
+ io_u->resid = io_u->xfer_buflen - len;
+ io_u->error = 0;
+ } else {
+ /* access beyond i_size */
+ io_u->error = EINVAL;
+ }
}
if (ret)
io_u->error = errno;
--- /dev/null
+/*
+ * ftruncate: ioengine for git://git.kernel.dk/fio.git
+ *
+ * IO engine that does regular truncates to simulate data transfer
+ * as fio ioengine.
+ * DDIR_WRITE does ftruncate
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <assert.h>
+#include <fcntl.h>
+
+#include "../fio.h"
+#include "../filehash.h"
+
+static int fio_ftruncate_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct fio_file *f = io_u->file;
+ int ret;
+ fio_ro_check(td, io_u);
+
+ if (io_u->ddir != DDIR_WRITE) {
+ io_u->error = EINVAL;
+ return FIO_Q_COMPLETED;
+ }
+ ret = ftruncate(f->fd, io_u->offset);
+
+ if (ret)
+ io_u->error = errno;
+
+ return FIO_Q_COMPLETED;
+}
+
+static struct ioengine_ops ioengine = {
+ .name = "ftruncate",
+ .version = FIO_IOOPS_VERSION,
+ .queue = fio_ftruncate_queue,
+ .open_file = generic_open_file,
+ .close_file = generic_close_file,
+ .get_file_size = generic_get_file_size,
+ .flags = FIO_SYNCIO | FIO_FAKEIO
+};
+
+static void fio_init fio_syncio_register(void)
+{
+ register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_syncio_unregister(void)
+{
+ unregister_ioengine(&ioengine);
+}
*/
ret = rbd_aio_get_return_value(fri->completion);
if (ret < 0) {
- io_u->error = ret;
+ io_u->error = -ret;
io_u->resid = io_u->xfer_buflen;
} else
io_u->error = 0;
failed_comp:
rbd_aio_release(fri->completion);
failed:
- io_u->error = r;
+ io_u->error = -r;
td_verror(td, io_u->error, "xfer");
return FIO_Q_COMPLETED;
}
--- /dev/null
+# Example ftruncate engine jobs
+
+[global]
+ioengine=ftruncate
+directory=/scratch
+size=102404k ; 100Mb+4k
+stonewall
+filename=truncate
+runtime=10s
+time_based
+direct=1
+#
+# bs option is stub here. Truncation is performed on the current block offset.
+# blocksize value is ignored
+bs=4k
+
+# truncate the file to 4Kbytes then repeatedly grow the file back to just over
+# its original size using subsequent truncates
+[grow-truncate]
+rw=write
+
+# Repeatedly change a file to a random size between 0Kbytes and 100Mb
+# using truncates
+[rand-truncate]
+rw=randwrite
+norandommap
+
--- /dev/null
+# Example gpudirect rdma client job
+[global]
+ioengine=rdma
+hostname=[hostname]
+port=[port]
+verb=[read/write/send/recv]
+mem=cudamalloc
+gpu_dev_id=0
+bs=1m
+size=100g
+
+[sender]
+rw=write
+iodepth=1
+iodepth_batch_complete=1
--- /dev/null
+# Example rdma server job
+[global]
+ioengine=rdma
+port=[port]
+mem=cudamalloc
+gpu_dev_id=0
+bs=1m
+size=100g
+
+[receiver]
+rw=read
+iodepth=16
}
}
- b = malloc(td->o.max_bs[DDIR_WRITE]);
-
left = f->real_file_size;
+ bs = td->o.max_bs[DDIR_WRITE];
+ if (bs > left)
+ bs = left;
+
+ b = malloc(bs);
+ if (!b) {
+ td_verror(td, errno, "malloc");
+ goto err;
+ }
+
while (left && !td->terminate) {
- bs = td->o.max_bs[DDIR_WRITE];
if (bs > left)
bs = left;
unsigned int bs;
char *b;
- if (td_ioengine_flagged(td, FIO_PIPEIO))
+ if (td_ioengine_flagged(td, FIO_PIPEIO) ||
+ td_ioengine_flagged(td, FIO_NOIO))
+ return 0;
+
+ if (f->filetype == FIO_TYPE_CHAR)
return 0;
if (!fio_file_open(f)) {
old_runstate = td_bump_runstate(td, TD_PRE_READING);
+ left = f->io_size;
bs = td->o.max_bs[DDIR_READ];
+ if (bs > left)
+ bs = left;
+
b = malloc(bs);
+ if (!b) {
+ td_verror(td, errno, "malloc");
+ ret = 1;
+ goto error;
+ }
memset(b, 0, bs);
if (lseek(f->fd, f->file_offset, SEEK_SET) < 0) {
goto error;
}
- left = f->io_size;
-
while (left && !td->terminate) {
if (bs > left)
bs = left;
}
/*
- * We normally don't come here, but if the result is 0,
- * set it to the real file size. This could be size of
- * the existing one if it already exists, but otherwise
- * will be set to 0. A new file won't be created because
+ * We normally don't come here for regular files, but
+ * if the result is 0 for a regular file, set it to the
+ * real file size. This could be size of the existing
+ * one if it already exists, but otherwise will be set
+ * to 0. A new file won't be created because
* ->io_size + ->file_offset equals ->real_file_size.
*/
if (!f->io_size) {
dprint(FD_FILE, "pre_read files\n");
for_each_file(td, f, i) {
- pre_read_file(td, f);
+ if (pre_read_file(td, f))
+ return -1;
}
- return 1;
+ return 0;
}
static int __init_rand_distribution(struct thread_data *td, struct fio_file *f)
.TP
.B mmapshared
Same as \fBmmap\fR, but use a MMAP_SHARED mapping.
+.TP
+.B cudamalloc
+Use GPU memory as the buffers for GPUDirect RDMA benchmark. The ioengine must be \fBrdma\fR.
.RE
.P
The amount of memory allocated is the maximum allowed \fBblocksize\fR for the
#include "debug.h"
#include "file.h"
#include "io_ddir.h"
-#include "ioengine.h"
+#include "ioengines.h"
#include "iolog.h"
#include "helpers.h"
#include "options.h"
#include "oslib/getopt.h"
#include "lib/rand.h"
#include "lib/rbtree.h"
+#include "lib/num2str.h"
#include "client.h"
#include "server.h"
#include "stat.h"
#include "flow.h"
+#include "io_u.h"
#include "io_u_queue.h"
#include "workqueue.h"
#include "steadystate.h"
#define MPOL_LOCAL MPOL_MAX
#endif
+#ifdef CONFIG_CUDA
+#include <cuda.h>
+#endif
+
/*
* offset generator types
*/
TD_F_VER_NONE = 1U << 5,
TD_F_PROFILE_OPS = 1U << 6,
TD_F_COMPRESS = 1U << 7,
- TD_F_NOIO = 1U << 8,
+ TD_F_RESERVED = 1U << 8, /* not used */
TD_F_COMPRESS_LOG = 1U << 9,
TD_F_VSTATE_SAVED = 1U << 10,
TD_F_NEED_LOCK = 1U << 11,
FIO_DEDUPE_OFF,
FIO_RAND_POISSON_OFF,
FIO_RAND_ZONE_OFF,
+ FIO_RAND_POISSON2_OFF,
+ FIO_RAND_POISSON3_OFF,
FIO_RAND_NR_OFFS,
};
* Per-thread/process specific data. Only used for the network client
* for now.
*/
-struct sk_out;
void sk_out_assign(struct sk_out *);
void sk_out_drop(void);
* to any of the available IO engines.
*/
struct ioengine_ops *io_ops;
+ int io_ops_init;
/*
* IO engine private data and dlhandle.
unsigned long rate_blocks[DDIR_RWDIR_CNT];
unsigned long long rate_io_issue_bytes[DDIR_RWDIR_CNT];
struct timeval lastrate[DDIR_RWDIR_CNT];
- int64_t last_usec;
- struct frand_state poisson_state;
+ int64_t last_usec[DDIR_RWDIR_CNT];
+ struct frand_state poisson_state[DDIR_RWDIR_CNT];
/*
* Enforced rate submission/completion workqueue
struct steadystate_data ss;
char verror[FIO_VERROR_SIZE];
+
+#ifdef CONFIG_CUDA
+ /*
+ * for GPU memory management
+ */
+ int gpu_dev_cnt;
+ int gpu_dev_id;
+ CUdevice cu_dev;
+ CUcontext cu_ctx;
+ CUdeviceptr dev_mem_ptr;
+#endif
+
};
/*
extern void td_fill_rand_seeds(struct thread_data *);
extern void td_fill_verify_state_seed(struct thread_data *);
extern void add_job_opts(const char **, int);
-extern char *num2str(uint64_t, int, int, int, int);
extern int ioengine_load(struct thread_data *);
extern bool parse_dryrun(void);
extern int fio_running_or_pending_io_threads(void);
extern int initialize_fio(char *envp[]);
extern void deinitialize_fio(void);
-#define N2S_NONE 0
-#define N2S_BITPERSEC 1 /* match unit_base for bit rates */
-#define N2S_PERSEC 2
-#define N2S_BIT 3
-#define N2S_BYTE 4
-#define N2S_BYTEPERSEC 8 /* match unit_base for byte rates */
-
#define FIO_GETOPT_JOB 0x89000000
#define FIO_GETOPT_IOENGINE 0x98000000
#define FIO_NR_OPTIONS (FIO_MAX_OPTS + 128)
static inline void td_set_ioengine_flags(struct thread_data *td)
{
- td->flags |= (td->io_ops->flags << TD_ENG_FLAG_SHIFT);
+ td->flags = (~(TD_ENG_FLAG_MASK << TD_ENG_FLAG_SHIFT) & td->flags) |
+ (td->io_ops->flags << TD_ENG_FLAG_SHIFT);
}
static inline bool td_ioengine_flagged(struct thread_data *td,
copy_opt_list(td, parent);
td->io_ops = NULL;
+ td->io_ops_init = 0;
if (!preserve_eo)
td->eo = NULL;
td->rate_next_io_time[ddir] = 0;
td->rate_io_issue_bytes[ddir] = 0;
- td->last_usec = 0;
+ td->last_usec[ddir] = 0;
return 0;
}
}
if (o->pre_read) {
- if (o->invalidate_cache) {
- log_info("fio: ignore invalidate option for %s\n",
- o->name);
+ if (o->invalidate_cache)
o->invalidate_cache = 0;
- }
if (td_ioengine_flagged(td, FIO_PIPEIO)) {
log_info("fio: cannot pre-read files with an IO engine"
" that isn't seekable. Pre-read disabled.\n");
init_rand_seed(&td->file_size_state, td->rand_seeds[FIO_RAND_FILE_SIZE_OFF], use64);
init_rand_seed(&td->trim_state, td->rand_seeds[FIO_RAND_TRIM_OFF], use64);
init_rand_seed(&td->delay_state, td->rand_seeds[FIO_RAND_START_DELAY], use64);
- init_rand_seed(&td->poisson_state, td->rand_seeds[FIO_RAND_POISSON_OFF], 0);
+ init_rand_seed(&td->poisson_state[0], td->rand_seeds[FIO_RAND_POISSON_OFF], 0);
+ init_rand_seed(&td->poisson_state[1], td->rand_seeds[FIO_RAND_POISSON2_OFF], 0);
+ init_rand_seed(&td->poisson_state[2], td->rand_seeds[FIO_RAND_POISSON3_OFF], 0);
init_rand_seed(&td->dedupe_state, td->rand_seeds[FIO_DEDUPE_OFF], false);
init_rand_seed(&td->zone_state, td->rand_seeds[FIO_RAND_ZONE_OFF], false);
if (o->verify_async || o->io_submit_mode == IO_MODE_OFFLOAD)
td->flags |= TD_F_NEED_LOCK;
+
+ if (o->mem_type == MEM_CUDA_MALLOC)
+ td->flags &= ~TD_F_SCRAMBLE_BUFFERS;
}
static int setup_random_seeds(struct thread_data *td)
if (!o->filename_format || !strlen(o->filename_format)) {
sprintf(buf, "%s.%d.%d", jobname, jobnum, filenum);
- return NULL;
+ return buf;
}
for (f = &fpre_keywords[0]; f->keyword; f++)
ddir_str(o->td_ddir));
if (o->bs_is_seq_rand)
- log_info("bs=%s-%s,%s-%s, bs_is_seq_rand, ",
+ log_info("bs=(R) %s-%s, (W) %s-%s, bs_is_seq_rand, ",
c1, c2, c3, c4);
else
- log_info("bs=%s-%s,%s-%s,%s-%s, ",
+ log_info("bs=(R) %s-%s, (W) %s-%s, (T) %s-%s, ",
c1, c2, c3, c4, c5, c6);
log_info("ioengine=%s, iodepth=%u\n",
unsigned int buflen = 0;
unsigned int minbs, maxbs;
uint64_t frand_max, r;
+ bool power_2;
assert(ddir_rw(ddir));
}
}
- if (!td->o.bs_unaligned && is_power_of_2(minbs))
+ power_2 = is_power_of_2(minbs);
+ if (!td->o.bs_unaligned && power_2)
buflen &= ~(minbs - 1);
-
+ else if (!td->o.bs_unaligned && !power_2)
+ buflen -= buflen % minbs;
} while (!io_u_fits(td, io_u, buflen));
return buflen;
enum fio_ddir ddir;
/*
- * see if it's time to fsync
- */
- if (td->o.fsync_blocks &&
- !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) &&
- td->io_issues[DDIR_WRITE] && should_fsync(td))
- return DDIR_SYNC;
-
- /*
- * see if it's time to fdatasync
- */
- if (td->o.fdatasync_blocks &&
- !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks) &&
- td->io_issues[DDIR_WRITE] && should_fsync(td))
- return DDIR_DATASYNC;
-
- /*
- * see if it's time to sync_file_range
+ * See if it's time to fsync/fdatasync/sync_file_range first,
+ * and if not then move on to check regular I/Os.
*/
- if (td->sync_file_range_nr &&
- !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr) &&
- td->io_issues[DDIR_WRITE] && should_fsync(td))
- return DDIR_SYNC_FILE_RANGE;
+ if (should_fsync(td)) {
+ if (td->o.fsync_blocks && td->io_issues[DDIR_WRITE] &&
+ !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks))
+ return DDIR_SYNC;
+
+ if (td->o.fdatasync_blocks && td->io_issues[DDIR_WRITE] &&
+ !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks))
+ return DDIR_DATASYNC;
+
+ if (td->sync_file_range_nr && td->io_issues[DDIR_WRITE] &&
+ !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr))
+ return DDIR_SYNC_FILE_RANGE;
+ }
if (td_rw(td)) {
/*
}
if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
- dprint(FD_IO, "io_u %p, offset too large\n", io_u);
- dprint(FD_IO, " off=%llu/%lu > %llu\n",
+ dprint(FD_IO, "io_u %p, offset + buflen exceeds file size\n",
+ io_u);
+ dprint(FD_IO, " offset=%llu/buflen=%lu > %llu\n",
(unsigned long long) io_u->offset, io_u->buflen,
(unsigned long long) io_u->file->real_file_size);
return 1;
if (!td_io_prep(td, io_u)) {
if (!td->o.disable_lat)
fio_gettime(&io_u->start_time, NULL);
+
if (do_scramble)
small_content_scramble(io_u);
+
return io_u;
}
err_put:
{
struct thread_options *o = &td->o;
+ if (o->mem_type == MEM_CUDA_MALLOC)
+ return;
+
if (o->compress_percentage || o->dedupe_percentage) {
unsigned int perc = td->o.compress_percentage;
struct frand_state *rs;
io_u->buf_filled_len = 0;
fill_io_buffer(td, io_u->buf, min_write, max_bs);
}
+
+static int do_sync_file_range(const struct thread_data *td,
+ struct fio_file *f)
+{
+ off64_t offset, nbytes;
+
+ offset = f->first_write;
+ nbytes = f->last_write - f->first_write;
+
+ if (!nbytes)
+ return 0;
+
+ return sync_file_range(f->fd, offset, nbytes, td->o.sync_file_range);
+}
+
+int do_io_u_sync(const struct thread_data *td, struct io_u *io_u)
+{
+ int ret;
+
+ if (io_u->ddir == DDIR_SYNC) {
+ ret = fsync(io_u->file->fd);
+ } else if (io_u->ddir == DDIR_DATASYNC) {
+#ifdef CONFIG_FDATASYNC
+ ret = fdatasync(io_u->file->fd);
+#else
+ ret = io_u->xfer_buflen;
+ io_u->error = EINVAL;
+#endif
+ } else if (io_u->ddir == DDIR_SYNC_FILE_RANGE)
+ ret = do_sync_file_range(td, io_u->file);
+ else {
+ ret = io_u->xfer_buflen;
+ io_u->error = EINVAL;
+ }
+
+ if (ret < 0)
+ io_u->error = errno;
+
+ return ret;
+}
+
+int do_io_u_trim(const struct thread_data *td, struct io_u *io_u)
+{
+#ifndef FIO_HAVE_TRIM
+ io_u->error = EINVAL;
+ return 0;
+#else
+ struct fio_file *f = io_u->file;
+ int ret;
+
+ ret = os_trim(f->fd, io_u->offset, io_u->xfer_buflen);
+ if (!ret)
+ return io_u->xfer_buflen;
+
+ io_u->error = ret;
+ return 0;
+#endif
+}
--- /dev/null
+#ifndef FIO_IO_U
+#define FIO_IO_U
+
+#include "compiler/compiler.h"
+#include "os/os.h"
+#include "log.h"
+#include "io_ddir.h"
+#include "debug.h"
+#include "file.h"
+#include "workqueue.h"
+
+#ifdef CONFIG_LIBAIO
+#include <libaio.h>
+#endif
+#ifdef CONFIG_GUASI
+#include <guasi.h>
+#endif
+
+enum {
+ IO_U_F_FREE = 1 << 0,
+ IO_U_F_FLIGHT = 1 << 1,
+ IO_U_F_NO_FILE_PUT = 1 << 2,
+ IO_U_F_IN_CUR_DEPTH = 1 << 3,
+ IO_U_F_BUSY_OK = 1 << 4,
+ IO_U_F_TRIMMED = 1 << 5,
+ IO_U_F_BARRIER = 1 << 6,
+ IO_U_F_VER_LIST = 1 << 7,
+};
+
+/*
+ * The io unit
+ */
+struct io_u {
+ struct timeval start_time;
+ struct timeval issue_time;
+
+ struct fio_file *file;
+ unsigned int flags;
+ enum fio_ddir ddir;
+
+ /*
+ * For replay workloads, we may want to account as a different
+ * IO type than what is being submitted.
+ */
+ enum fio_ddir acct_ddir;
+
+ /*
+ * Write generation
+ */
+ unsigned short numberio;
+
+ /*
+ * Allocated/set buffer and length
+ */
+ unsigned long buflen;
+ unsigned long long offset;
+ void *buf;
+
+ /*
+ * Initial seed for generating the buffer contents
+ */
+ uint64_t rand_seed;
+
+ /*
+ * IO engine state, may be different from above when we get
+ * partial transfers / residual data counts
+ */
+ void *xfer_buf;
+ unsigned long xfer_buflen;
+
+ /*
+ * Parameter related to pre-filled buffers and
+ * their size to handle variable block sizes.
+ */
+ unsigned long buf_filled_len;
+
+ struct io_piece *ipo;
+
+ unsigned int resid;
+ unsigned int error;
+
+ /*
+ * io engine private data
+ */
+ union {
+ unsigned int index;
+ unsigned int seen;
+ void *engine_data;
+ };
+
+ union {
+ struct flist_head verify_list;
+ struct workqueue_work work;
+ };
+
+ /*
+ * Callback for io completion
+ */
+ int (*end_io)(struct thread_data *, struct io_u **);
+
+ union {
+#ifdef CONFIG_LIBAIO
+ struct iocb iocb;
+#endif
+#ifdef CONFIG_POSIXAIO
+ os_aiocb_t aiocb;
+#endif
+#ifdef FIO_HAVE_SGIO
+ struct sg_io_hdr hdr;
+#endif
+#ifdef CONFIG_GUASI
+ guasi_req_t greq;
+#endif
+#ifdef CONFIG_SOLARISAIO
+ aio_result_t resultp;
+#endif
+#ifdef FIO_HAVE_BINJECT
+ struct b_user_cmd buc;
+#endif
+#ifdef CONFIG_RDMA
+ struct ibv_mr *mr;
+#endif
+ void *mmap_data;
+ };
+};
+
+/*
+ * io unit handling
+ */
+extern struct io_u *__get_io_u(struct thread_data *);
+extern struct io_u *get_io_u(struct thread_data *);
+extern void put_io_u(struct thread_data *, struct io_u *);
+extern void clear_io_u(struct thread_data *, struct io_u *);
+extern void requeue_io_u(struct thread_data *, struct io_u **);
+extern int __must_check io_u_sync_complete(struct thread_data *, struct io_u *);
+extern int __must_check io_u_queued_complete(struct thread_data *, int);
+extern void io_u_queued(struct thread_data *, struct io_u *);
+extern int io_u_quiesce(struct thread_data *);
+extern void io_u_log_error(struct thread_data *, struct io_u *);
+extern void io_u_mark_depth(struct thread_data *, unsigned int);
+extern void fill_io_buffer(struct thread_data *, void *, unsigned int, unsigned int);
+extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int, unsigned int);
+void io_u_mark_complete(struct thread_data *, unsigned int);
+void io_u_mark_submit(struct thread_data *, unsigned int);
+bool queue_full(const struct thread_data *);
+
+int do_io_u_sync(const struct thread_data *, struct io_u *);
+int do_io_u_trim(const struct thread_data *, struct io_u *);
+
+#ifdef FIO_INC_DEBUG
+static inline void dprint_io_u(struct io_u *io_u, const char *p)
+{
+ struct fio_file *f = io_u->file;
+
+ dprint(FD_IO, "%s: io_u %p: off=%llu/len=%lu/ddir=%d", p, io_u,
+ (unsigned long long) io_u->offset,
+ io_u->buflen, io_u->ddir);
+ if (f)
+ dprint(FD_IO, "/%s", f->file_name);
+ dprint(FD_IO, "\n");
+}
+#else
+#define dprint_io_u(io_u, p)
+#endif
+
+static inline enum fio_ddir acct_ddir(struct io_u *io_u)
+{
+ if (io_u->acct_ddir != -1)
+ return io_u->acct_ddir;
+
+ return io_u->ddir;
+}
+
+#define io_u_clear(td, io_u, val) \
+ td_flags_clear((td), &(io_u->flags), (val))
+#define io_u_set(td, io_u, val) \
+ td_flags_set((td), &(io_u)->flags, (val))
+
+#endif
+++ /dev/null
-#ifndef FIO_IOENGINE_H
-#define FIO_IOENGINE_H
-
-#include "compiler/compiler.h"
-#include "os/os.h"
-#include "log.h"
-#include "io_ddir.h"
-#include "debug.h"
-#include "file.h"
-#include "workqueue.h"
-
-#ifdef CONFIG_LIBAIO
-#include <libaio.h>
-#endif
-#ifdef CONFIG_GUASI
-#include <guasi.h>
-#endif
-
-#define FIO_IOOPS_VERSION 23
-
-enum {
- IO_U_F_FREE = 1 << 0,
- IO_U_F_FLIGHT = 1 << 1,
- IO_U_F_NO_FILE_PUT = 1 << 2,
- IO_U_F_IN_CUR_DEPTH = 1 << 3,
- IO_U_F_BUSY_OK = 1 << 4,
- IO_U_F_TRIMMED = 1 << 5,
- IO_U_F_BARRIER = 1 << 6,
- IO_U_F_VER_LIST = 1 << 7,
-};
-
-/*
- * The io unit
- */
-struct io_u {
- struct timeval start_time;
- struct timeval issue_time;
-
- struct fio_file *file;
- unsigned int flags;
- enum fio_ddir ddir;
-
- /*
- * For replay workloads, we may want to account as a different
- * IO type than what is being submitted.
- */
- enum fio_ddir acct_ddir;
-
- /*
- * Write generation
- */
- unsigned short numberio;
-
- /*
- * Allocated/set buffer and length
- */
- unsigned long buflen;
- unsigned long long offset;
- void *buf;
-
- /*
- * Initial seed for generating the buffer contents
- */
- uint64_t rand_seed;
-
- /*
- * IO engine state, may be different from above when we get
- * partial transfers / residual data counts
- */
- void *xfer_buf;
- unsigned long xfer_buflen;
-
- /*
- * Parameter related to pre-filled buffers and
- * their size to handle variable block sizes.
- */
- unsigned long buf_filled_len;
-
- struct io_piece *ipo;
-
- unsigned int resid;
- unsigned int error;
-
- /*
- * io engine private data
- */
- union {
- unsigned int index;
- unsigned int seen;
- void *engine_data;
- };
-
- union {
- struct flist_head verify_list;
- struct workqueue_work work;
- };
-
- /*
- * Callback for io completion
- */
- int (*end_io)(struct thread_data *, struct io_u **);
-
- union {
-#ifdef CONFIG_LIBAIO
- struct iocb iocb;
-#endif
-#ifdef CONFIG_POSIXAIO
- os_aiocb_t aiocb;
-#endif
-#ifdef FIO_HAVE_SGIO
- struct sg_io_hdr hdr;
-#endif
-#ifdef CONFIG_GUASI
- guasi_req_t greq;
-#endif
-#ifdef CONFIG_SOLARISAIO
- aio_result_t resultp;
-#endif
-#ifdef FIO_HAVE_BINJECT
- struct b_user_cmd buc;
-#endif
-#ifdef CONFIG_RDMA
- struct ibv_mr *mr;
-#endif
- void *mmap_data;
- };
-};
-
-/*
- * io_ops->queue() return values
- */
-enum {
- FIO_Q_COMPLETED = 0, /* completed sync */
- FIO_Q_QUEUED = 1, /* queued, will complete async */
- FIO_Q_BUSY = 2, /* no more room, call ->commit() */
-};
-
-struct ioengine_ops {
- struct flist_head list;
- const char *name;
- int version;
- int flags;
- int (*setup)(struct thread_data *);
- int (*init)(struct thread_data *);
- int (*prep)(struct thread_data *, struct io_u *);
- int (*queue)(struct thread_data *, struct io_u *);
- int (*commit)(struct thread_data *);
- int (*getevents)(struct thread_data *, unsigned int, unsigned int, const struct timespec *);
- struct io_u *(*event)(struct thread_data *, int);
- char *(*errdetails)(struct io_u *);
- int (*cancel)(struct thread_data *, struct io_u *);
- void (*cleanup)(struct thread_data *);
- int (*open_file)(struct thread_data *, struct fio_file *);
- int (*close_file)(struct thread_data *, struct fio_file *);
- int (*invalidate)(struct thread_data *, struct fio_file *);
- int (*unlink_file)(struct thread_data *, struct fio_file *);
- int (*get_file_size)(struct thread_data *, struct fio_file *);
- void (*terminate)(struct thread_data *);
- int (*iomem_alloc)(struct thread_data *, size_t);
- void (*iomem_free)(struct thread_data *);
- int (*io_u_init)(struct thread_data *, struct io_u *);
- void (*io_u_free)(struct thread_data *, struct io_u *);
- int option_struct_size;
- struct fio_option *options;
-};
-
-enum fio_ioengine_flags {
- FIO_SYNCIO = 1 << 0, /* io engine has synchronous ->queue */
- FIO_RAWIO = 1 << 1, /* some sort of direct/raw io */
- FIO_DISKLESSIO = 1 << 2, /* no disk involved */
- FIO_NOEXTEND = 1 << 3, /* engine can't extend file */
- FIO_NODISKUTIL = 1 << 4, /* diskutil can't handle filename */
- FIO_UNIDIR = 1 << 5, /* engine is uni-directional */
- FIO_NOIO = 1 << 6, /* thread does only pseudo IO */
- FIO_PIPEIO = 1 << 7, /* input/output no seekable */
- FIO_BARRIER = 1 << 8, /* engine supports barriers */
- FIO_MEMALIGN = 1 << 9, /* engine wants aligned memory */
- FIO_BIT_BASED = 1 << 10, /* engine uses a bit base (e.g. uses Kbit as opposed to KB) */
- FIO_FAKEIO = 1 << 11, /* engine pretends to do IO */
-};
-
-/*
- * External engine defined symbol to fill in the engine ops structure
- */
-typedef void (*get_ioengine_t)(struct ioengine_ops **);
-
-/*
- * io engine entry points
- */
-extern int __must_check td_io_init(struct thread_data *);
-extern int __must_check td_io_prep(struct thread_data *, struct io_u *);
-extern int __must_check td_io_queue(struct thread_data *, struct io_u *);
-extern int __must_check td_io_sync(struct thread_data *, struct fio_file *);
-extern int __must_check td_io_getevents(struct thread_data *, unsigned int, unsigned int, const struct timespec *);
-extern int __must_check td_io_commit(struct thread_data *);
-extern int __must_check td_io_open_file(struct thread_data *, struct fio_file *);
-extern int td_io_close_file(struct thread_data *, struct fio_file *);
-extern int td_io_unlink_file(struct thread_data *, struct fio_file *);
-extern int __must_check td_io_get_file_size(struct thread_data *, struct fio_file *);
-
-extern struct ioengine_ops *load_ioengine(struct thread_data *, const char *);
-extern void register_ioengine(struct ioengine_ops *);
-extern void unregister_ioengine(struct ioengine_ops *);
-extern void free_ioengine(struct thread_data *);
-extern void close_ioengine(struct thread_data *);
-
-extern int fio_show_ioengine_help(const char *engine);
-
-/*
- * io unit handling
- */
-extern struct io_u *__get_io_u(struct thread_data *);
-extern struct io_u *get_io_u(struct thread_data *);
-extern void put_io_u(struct thread_data *, struct io_u *);
-extern void clear_io_u(struct thread_data *, struct io_u *);
-extern void requeue_io_u(struct thread_data *, struct io_u **);
-extern int __must_check io_u_sync_complete(struct thread_data *, struct io_u *);
-extern int __must_check io_u_queued_complete(struct thread_data *, int);
-extern void io_u_queued(struct thread_data *, struct io_u *);
-extern int io_u_quiesce(struct thread_data *);
-extern void io_u_log_error(struct thread_data *, struct io_u *);
-extern void io_u_mark_depth(struct thread_data *, unsigned int);
-extern void fill_io_buffer(struct thread_data *, void *, unsigned int, unsigned int);
-extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int, unsigned int);
-void io_u_mark_complete(struct thread_data *, unsigned int);
-void io_u_mark_submit(struct thread_data *, unsigned int);
-bool queue_full(const struct thread_data *);
-
-int do_io_u_sync(const struct thread_data *, struct io_u *);
-int do_io_u_trim(const struct thread_data *, struct io_u *);
-
-#ifdef FIO_INC_DEBUG
-static inline void dprint_io_u(struct io_u *io_u, const char *p)
-{
- struct fio_file *f = io_u->file;
-
- dprint(FD_IO, "%s: io_u %p: off=%llu/len=%lu/ddir=%d", p, io_u,
- (unsigned long long) io_u->offset,
- io_u->buflen, io_u->ddir);
- if (f)
- dprint(FD_IO, "/%s", f->file_name);
- dprint(FD_IO, "\n");
-}
-#else
-#define dprint_io_u(io_u, p)
-#endif
-
-static inline enum fio_ddir acct_ddir(struct io_u *io_u)
-{
- if (io_u->acct_ddir != -1)
- return io_u->acct_ddir;
-
- return io_u->ddir;
-}
-
-#define io_u_clear(td, io_u, val) \
- td_flags_clear((td), &(io_u->flags), (val))
-#define io_u_set(td, io_u, val) \
- td_flags_set((td), &(io_u)->flags, (val))
-
-#endif
if (td->io_ops->init) {
ret = td->io_ops->init(td);
- if (ret && td->o.iodepth > 1) {
- log_err("fio: io engine init failed. Perhaps try"
- " reducing io depth?\n");
- }
+ if (ret)
+ log_err("fio: io engine %s init failed.%s\n",
+ td->io_ops->name,
+ td->o.iodepth > 1 ?
+ " Perhaps try reducing io depth?" : "");
+ else
+ td->io_ops_init = 1;
if (!td->error)
td->error = ret;
}
- if (!ret && td_ioengine_flagged(td, FIO_NOIO))
- td->flags |= TD_F_NOIO;
-
return ret;
}
return td->io_ops->get_file_size(td, f);
}
-static int do_sync_file_range(const struct thread_data *td,
- struct fio_file *f)
-{
- off64_t offset, nbytes;
-
- offset = f->first_write;
- nbytes = f->last_write - f->first_write;
-
- if (!nbytes)
- return 0;
-
- return sync_file_range(f->fd, offset, nbytes, td->o.sync_file_range);
-}
-
-int do_io_u_sync(const struct thread_data *td, struct io_u *io_u)
-{
- int ret;
-
- if (io_u->ddir == DDIR_SYNC) {
- ret = fsync(io_u->file->fd);
- } else if (io_u->ddir == DDIR_DATASYNC) {
-#ifdef CONFIG_FDATASYNC
- ret = fdatasync(io_u->file->fd);
-#else
- ret = io_u->xfer_buflen;
- io_u->error = EINVAL;
-#endif
- } else if (io_u->ddir == DDIR_SYNC_FILE_RANGE)
- ret = do_sync_file_range(td, io_u->file);
- else {
- ret = io_u->xfer_buflen;
- io_u->error = EINVAL;
- }
-
- if (ret < 0)
- io_u->error = errno;
-
- return ret;
-}
-
-int do_io_u_trim(const struct thread_data *td, struct io_u *io_u)
-{
-#ifndef FIO_HAVE_TRIM
- io_u->error = EINVAL;
- return 0;
-#else
- struct fio_file *f = io_u->file;
- int ret;
-
- ret = os_trim(f->fd, io_u->offset, io_u->xfer_buflen);
- if (!ret)
- return io_u->xfer_buflen;
-
- io_u->error = ret;
- return 0;
-#endif
-}
-
int fio_show_ioengine_help(const char *engine)
{
struct flist_head *entry;
--- /dev/null
+#ifndef FIO_IOENGINE_H
+#define FIO_IOENGINE_H
+
+#include "compiler/compiler.h"
+#include "os/os.h"
+#include "file.h"
+#include "io_u.h"
+
+#define FIO_IOOPS_VERSION 23
+
+/*
+ * io_ops->queue() return values
+ */
+enum {
+ FIO_Q_COMPLETED = 0, /* completed sync */
+ FIO_Q_QUEUED = 1, /* queued, will complete async */
+ FIO_Q_BUSY = 2, /* no more room, call ->commit() */
+};
+
+struct ioengine_ops {
+ struct flist_head list;
+ const char *name;
+ int version;
+ int flags;
+ int (*setup)(struct thread_data *);
+ int (*init)(struct thread_data *);
+ int (*prep)(struct thread_data *, struct io_u *);
+ int (*queue)(struct thread_data *, struct io_u *);
+ int (*commit)(struct thread_data *);
+ int (*getevents)(struct thread_data *, unsigned int, unsigned int, const struct timespec *);
+ struct io_u *(*event)(struct thread_data *, int);
+ char *(*errdetails)(struct io_u *);
+ int (*cancel)(struct thread_data *, struct io_u *);
+ void (*cleanup)(struct thread_data *);
+ int (*open_file)(struct thread_data *, struct fio_file *);
+ int (*close_file)(struct thread_data *, struct fio_file *);
+ int (*invalidate)(struct thread_data *, struct fio_file *);
+ int (*unlink_file)(struct thread_data *, struct fio_file *);
+ int (*get_file_size)(struct thread_data *, struct fio_file *);
+ void (*terminate)(struct thread_data *);
+ int (*iomem_alloc)(struct thread_data *, size_t);
+ void (*iomem_free)(struct thread_data *);
+ int (*io_u_init)(struct thread_data *, struct io_u *);
+ void (*io_u_free)(struct thread_data *, struct io_u *);
+ int option_struct_size;
+ struct fio_option *options;
+};
+
+enum fio_ioengine_flags {
+ FIO_SYNCIO = 1 << 0, /* io engine has synchronous ->queue */
+ FIO_RAWIO = 1 << 1, /* some sort of direct/raw io */
+ FIO_DISKLESSIO = 1 << 2, /* no disk involved */
+ FIO_NOEXTEND = 1 << 3, /* engine can't extend file */
+ FIO_NODISKUTIL = 1 << 4, /* diskutil can't handle filename */
+ FIO_UNIDIR = 1 << 5, /* engine is uni-directional */
+ FIO_NOIO = 1 << 6, /* thread does only pseudo IO */
+ FIO_PIPEIO = 1 << 7, /* input/output no seekable */
+ FIO_BARRIER = 1 << 8, /* engine supports barriers */
+ FIO_MEMALIGN = 1 << 9, /* engine wants aligned memory */
+ FIO_BIT_BASED = 1 << 10, /* engine uses a bit base (e.g. uses Kbit as opposed to KB) */
+ FIO_FAKEIO = 1 << 11, /* engine pretends to do IO */
+};
+
+/*
+ * External engine defined symbol to fill in the engine ops structure
+ */
+typedef void (*get_ioengine_t)(struct ioengine_ops **);
+
+/*
+ * io engine entry points
+ */
+extern int __must_check td_io_init(struct thread_data *);
+extern int __must_check td_io_prep(struct thread_data *, struct io_u *);
+extern int __must_check td_io_queue(struct thread_data *, struct io_u *);
+extern int __must_check td_io_getevents(struct thread_data *, unsigned int, unsigned int, const struct timespec *);
+extern int __must_check td_io_commit(struct thread_data *);
+extern int __must_check td_io_open_file(struct thread_data *, struct fio_file *);
+extern int td_io_close_file(struct thread_data *, struct fio_file *);
+extern int td_io_unlink_file(struct thread_data *, struct fio_file *);
+extern int __must_check td_io_get_file_size(struct thread_data *, struct fio_file *);
+
+extern struct ioengine_ops *load_ioengine(struct thread_data *, const char *);
+extern void register_ioengine(struct ioengine_ops *);
+extern void unregister_ioengine(struct ioengine_ops *);
+extern void free_ioengine(struct thread_data *);
+extern void close_ioengine(struct thread_data *);
+
+extern int fio_show_ioengine_help(const char *engine);
+
+#endif
sfree(log);
}
-inline unsigned long hist_sum(int j, int stride, unsigned int *io_u_plat,
+unsigned long hist_sum(int j, int stride, unsigned int *io_u_plat,
unsigned int *io_u_plat_last)
{
unsigned long sum;
#include "lib/rbtree.h"
#include "lib/ieee754.h"
#include "flist.h"
-#include "ioengine.h"
+#include "ioengines.h"
/*
* Use for maintaining statistics
#include <inttypes.h>
#include "memalign.h"
-#include "../fio.h"
+
+#define PTR_ALIGN(ptr, mask) \
+ (char *)((uintptr_t)((ptr) + (mask)) & ~(mask))
struct align_footer {
unsigned int offset;
#ifdef CONFIG_GETMNTENT
#include <mntent.h>
-#include "lib/mountcheck.h"
+#include "mountcheck.h"
#define MTAB "/etc/mtab"
#include <stdio.h>
#include <string.h>
-#include "../fio.h"
+#include "../compiler/compiler.h"
+#include "num2str.h"
+
+#define ARRAY_SIZE(x) (sizeof((x)) / (sizeof((x)[0])))
/**
* num2str() - Cheesy number->string conversion, complete with carry rounding error.
* @num: quantity (e.g., number of blocks, bytes or bits)
- * @maxlen: max number of digits in the output string (not counting prefix and units)
+ * @maxlen: max number of digits in the output string (not counting prefix and units, but counting .)
* @base: multiplier for num (e.g., if num represents Ki, use 1024)
* @pow2: select unit prefix - 0=power-of-10 decimal SI, nonzero=power-of-2 binary IEC
- * @units: select units - N2S_* macros defined in fio.h
+ * @units: select units - N2S_* macros defined in num2str.h
* @returns a malloc'd buffer containing "number[<unit prefix>][<units>]"
*/
char *num2str(uint64_t num, int maxlen, int base, int pow2, int units)
const char **unitprefix;
const char *unitstr[] = { "", "/s", "B", "bit", "B/s", "bit/s" };
const unsigned int thousand[] = { 1000, 1024 };
- unsigned int modulo, decimals;
+ unsigned int modulo;
int unit_index = 0, post_index, carry = 0;
- char tmp[32];
+ char tmp[32], fmt[32];
char *buf;
compiletime_assert(sizeof(sistr) == sizeof(iecstr), "unit prefix arrays must be identical sizes");
break;
}
+ /*
+ * Divide by K/Ki until string length of num <= maxlen.
+ */
modulo = -1U;
while (post_index < sizeof(sistr)) {
sprintf(tmp, "%llu", (unsigned long long) num);
post_index++;
}
+ /*
+ * If no modulo, then we're done.
+ */
if (modulo == -1U) {
done:
if (post_index >= ARRAY_SIZE(sistr))
return buf;
}
+ /*
+ * If no room for decimals, then we're done.
+ */
sprintf(tmp, "%llu", (unsigned long long) num);
- decimals = maxlen - strlen(tmp);
- if (decimals <= 1) {
+ if ((int)(maxlen - strlen(tmp)) <= 1) {
if (carry)
num++;
goto done;
}
- do {
- sprintf(tmp, "%u", modulo);
- if (strlen(tmp) <= decimals - 1)
- break;
-
- modulo = (modulo + 9) / 10;
- } while (1);
+ /*
+ * Fill in everything and return the result.
+ */
+ assert(maxlen - strlen(tmp) - 1 > 0);
+ assert(modulo < thousand[!!pow2]);
+ sprintf(fmt, "%%.%df", (int)(maxlen - strlen(tmp) - 1));
+ sprintf(tmp, fmt, (double)modulo / (double)thousand[!!pow2]);
- sprintf(buf, "%llu.%u%s%s", (unsigned long long) num, modulo,
+ sprintf(buf, "%llu.%s%s%s", (unsigned long long) num, &tmp[2],
unitprefix[post_index], unitstr[unit_index]);
return buf;
}
--- /dev/null
+#ifndef FIO_NUM2STR_H
+#define FIO_NUM2STR_H
+
+#include <inttypes.h>
+
+#define N2S_NONE 0
+#define N2S_BITPERSEC 1 /* match unit_base for bit rates */
+#define N2S_PERSEC 2
+#define N2S_BIT 3
+#define N2S_BYTE 4
+#define N2S_BYTEPERSEC 8 /* match unit_base for byte rates */
+
+extern char *num2str(uint64_t, int, int, int, int);
+
+#endif
-#include "fio.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <assert.h>
+
#include "strntol.h"
#include "pattern.h"
+#include "../minmax.h"
#include "../oslib/strcasestr.h"
/**
#define FIO_POW2_H
#include <inttypes.h>
+#include "types.h"
-static inline int is_power_of_2(uint64_t val)
+static inline bool is_power_of_2(uint64_t val)
{
return (val != 0 && ((val & (val - 1)) == 0));
}
#include <stdlib.h>
#include <limits.h>
-#include "../fio.h"
+
+#include "../compiler/compiler.h"
#include "prio_tree.h"
+#define ARRAY_SIZE(x) (sizeof((x)) / (sizeof((x)[0])))
+
/*
* A clever mix of heap and radix trees forms a radix priority search tree (PST)
* which is useful for storing intervals, e.g, we can consider a vma as a closed
#include <string.h>
#include <assert.h>
#include "rand.h"
-#include "lib/pattern.h"
+#include "pattern.h"
#include "../hash.h"
int arch_random;
--- /dev/null
+#ifndef FIO_SEQLOCK_H
+#define FIO_SEQLOCK_H
+
+#include "../arch/arch.h"
+
+struct seqlock {
+ volatile int sequence;
+};
+
+static inline void seqlock_init(struct seqlock *s)
+{
+ s->sequence = 0;
+}
+
+static inline unsigned int read_seqlock_begin(struct seqlock *s)
+{
+ unsigned int seq;
+
+ do {
+ seq = s->sequence;
+ if (!(seq & 1))
+ break;
+ nop;
+ } while (1);
+
+ read_barrier();
+ return seq;
+}
+
+static inline bool read_seqlock_retry(struct seqlock *s, unsigned int seq)
+{
+ read_barrier();
+ return s->sequence != seq;
+}
+
+static inline void write_seqlock_begin(struct seqlock *s)
+{
+ s->sequence++;
+ write_barrier();
+}
+
+static inline void write_seqlock_end(struct seqlock *s)
+{
+ write_barrier();
+ s->sequence++;
+}
+
+#endif
#include <stdlib.h>
#include <limits.h>
-#include "lib/strntol.h"
+#include "strntol.h"
long strntol(const char *str, size_t sz, char **end, int base)
{
#include <sys/types.h>
#include <fcntl.h>
#include "ieee754.h"
-#include "../log.h"
#include "zipf.h"
#include "../minmax.h"
#include "../hash.h"
int nr_io_threads = 0;
for_each_td(td, i) {
- if (td->flags & TD_F_NOIO)
+ if (td->io_ops_init && td_ioengine_flagged(td, FIO_NOIO))
continue;
nr_io_threads++;
if (td->runstate < TD_EXITED)
free(td->orig_buffer);
}
+static int alloc_mem_cudamalloc(struct thread_data *td, size_t total_mem)
+{
+#ifdef CONFIG_CUDA
+ CUresult ret;
+ char name[128];
+
+ ret = cuInit(0);
+ if (ret != CUDA_SUCCESS) {
+ log_err("fio: failed initialize cuda driver api\n");
+ return 1;
+ }
+
+ ret = cuDeviceGetCount(&td->gpu_dev_cnt);
+ if (ret != CUDA_SUCCESS) {
+ log_err("fio: failed get device count\n");
+ return 1;
+ }
+ dprint(FD_MEM, "found %d GPU devices\n", td->gpu_dev_cnt);
+
+ if (td->gpu_dev_cnt == 0) {
+ log_err("fio: no GPU device found. "
+ "Can not perform GPUDirect RDMA.\n");
+ return 1;
+ }
+
+ td->gpu_dev_id = td->o.gpu_dev_id;
+ ret = cuDeviceGet(&td->cu_dev, td->gpu_dev_id);
+ if (ret != CUDA_SUCCESS) {
+ log_err("fio: failed get GPU device\n");
+ return 1;
+ }
+
+ ret = cuDeviceGetName(name, sizeof(name), td->gpu_dev_id);
+ if (ret != CUDA_SUCCESS) {
+ log_err("fio: failed get device name\n");
+ return 1;
+ }
+ dprint(FD_MEM, "dev_id = [%d], device name = [%s]\n", \
+ td->gpu_dev_id, name);
+
+ ret = cuCtxCreate(&td->cu_ctx, CU_CTX_MAP_HOST, td->cu_dev);
+ if (ret != CUDA_SUCCESS) {
+ log_err("fio: failed to create cuda context: %d\n", ret);
+ return 1;
+ }
+
+ ret = cuMemAlloc(&td->dev_mem_ptr, total_mem);
+ if (ret != CUDA_SUCCESS) {
+ log_err("fio: cuMemAlloc %zu bytes failed\n", total_mem);
+ return 1;
+ }
+ td->orig_buffer = (void *) td->dev_mem_ptr;
+
+ dprint(FD_MEM, "cudaMalloc %llu %p\n", \
+ (unsigned long long) total_mem, td->orig_buffer);
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
+static void free_mem_cudamalloc(struct thread_data *td)
+{
+#ifdef CONFIG_CUDA
+ if (td->dev_mem_ptr != NULL)
+ cuMemFree(td->dev_mem_ptr);
+
+ if (cuCtxDestroy(td->cu_ctx) != CUDA_SUCCESS)
+ log_err("fio: failed to destroy cuda context\n");
+#endif
+}
+
/*
* Set up the buffer area we need for io.
*/
else if (td->o.mem_type == MEM_MMAP || td->o.mem_type == MEM_MMAPHUGE ||
td->o.mem_type == MEM_MMAPSHARED)
ret = alloc_mem_mmap(td, total_mem);
+ else if (td->o.mem_type == MEM_CUDA_MALLOC)
+ ret = alloc_mem_cudamalloc(td, total_mem);
else {
log_err("fio: bad mem type: %d\n", td->o.mem_type);
ret = 1;
else if (td->o.mem_type == MEM_MMAP || td->o.mem_type == MEM_MMAPHUGE ||
td->o.mem_type == MEM_MMAPSHARED)
free_mem_mmap(td, total_mem);
+ else if (td->o.mem_type == MEM_CUDA_MALLOC)
+ free_mem_cudamalloc(td);
else
log_err("Bad memory type %u\n", td->o.mem_type);
.oval = MEM_MMAPHUGE,
.help = "Like mmap, but use huge pages",
},
+#endif
+#ifdef CONFIG_CUDA
+ { .ival = "cudamalloc",
+ .oval = MEM_CUDA_MALLOC,
+ .help = "Allocate GPU device memory for GPUDirect RDMA",
+ },
#endif
},
},
.type = FIO_OPT_UNSUPPORTED,
.help = "Build fio with libnuma-dev(el) to enable this option",
},
+#endif
+#ifdef CONFIG_CUDA
+ {
+ .name = "gpu_dev_id",
+ .lname = "GPU device ID",
+ .type = FIO_OPT_INT,
+ .off1 = offsetof(struct thread_options, gpu_dev_id),
+ .help = "Set GPU device ID for GPUDirect RDMA",
+ .def = "0",
+ .category = FIO_OPT_C_GENERAL,
+ .group = FIO_OPT_G_INVALID,
+ },
#endif
{
.name = "end_fsync",
#include "binject.h"
#include "../file.h"
+#ifndef __has_builtin // Optional of course.
+ #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
+#endif
+
#define FIO_HAVE_DISK_UTIL
#define FIO_HAVE_IOSCHED_SWITCH
#define FIO_HAVE_IOPRIO
#define MAP_HUGETLB 0x40000 /* arch specific */
#endif
-
+#ifndef CONFIG_NO_SHM
/*
* The Android NDK doesn't currently export <sys/shm.h>, so define the
* necessary stuff here.
*/
-#include <linux/shm.h>
+#include <sys/shm.h>
#define SHM_HUGETLB 04000
#include <stdio.h>
#include <linux/ashmem.h>
-#include <sys/mman.h>
#define ASHMEM_DEVICE "/dev/ashmem"
static inline int shmget (key_t __key, size_t __size, int __shmflg)
{
int fd,ret;
- char key[11];
-
+ char keybuf[11];
+
fd = open(ASHMEM_DEVICE, O_RDWR);
if (fd < 0)
return fd;
- sprintf(key,"%d",__key);
- ret = ioctl(fd, ASHMEM_SET_NAME, key);
+ sprintf(keybuf,"%d",__key);
+ ret = ioctl(fd, ASHMEM_SET_NAME, keybuf);
if (ret < 0)
goto error;
goto error;
return fd;
-
+
error:
close(fd);
return ret;
size = *ptr; //find mmap size which we stored at the beginning of the buffer
return munmap((void *)ptr, size + sizeof(size_t));
}
+#endif
#define SPLICE_DEF_SIZE (64*1024)
#define FIO_O_NOATIME 0
#endif
-#define fio_swap16(x) __bswap_16(x)
-#define fio_swap32(x) __bswap_32(x)
-#define fio_swap64(x) __bswap_64(x)
+/* Check for GCC or Clang byte swap intrinsics */
+#if (__has_builtin(__builtin_bswap16) && __has_builtin(__builtin_bswap32) \
+ && __has_builtin(__builtin_bswap64)) || (__GNUC__ > 4 \
+ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) /* fio_swapN */
+#define fio_swap16(x) __builtin_bswap16(x)
+#define fio_swap32(x) __builtin_bswap32(x)
+#define fio_swap64(x) __builtin_bswap64(x)
+#else
+#include <byteswap.h>
+#define fio_swap16(x) bswap_16(x)
+#define fio_swap32(x) bswap_32(x)
+#define fio_swap64(x) bswap_64(x)
+#endif /* fio_swapN */
#define CACHE_LINE_FILE \
"/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"
#include <linux/unistd.h>
#include <linux/raw.h>
#include <linux/major.h>
-#include <byteswap.h>
#include "./os-linux-syscall.h"
#include "binject.h"
#include "../file.h"
+#ifndef __has_builtin // Optional of course.
+ #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
+#endif
+
#define FIO_HAVE_CPU_AFFINITY
#define FIO_HAVE_DISK_UTIL
#define FIO_HAVE_SGIO
#define FIO_MADV_FREE MADV_REMOVE
#endif
-#if defined(__builtin_bswap16)
+/* Check for GCC or Clang byte swap intrinsics */
+#if (__has_builtin(__builtin_bswap16) && __has_builtin(__builtin_bswap32) \
+ && __has_builtin(__builtin_bswap64)) || (__GNUC__ > 4 \
+ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) /* fio_swapN */
#define fio_swap16(x) __builtin_bswap16(x)
-#else
-#define fio_swap16(x) __bswap_16(x)
-#endif
-#if defined(__builtin_bswap32)
#define fio_swap32(x) __builtin_bswap32(x)
-#else
-#define fio_swap32(x) __bswap_32(x)
-#endif
-#if defined(__builtin_bswap64)
#define fio_swap64(x) __builtin_bswap64(x)
#else
-#define fio_swap64(x) __bswap_64(x)
-#endif
+#include <byteswap.h>
+#define fio_swap16(x) bswap_16(x)
+#define fio_swap32(x) bswap_32(x)
+#define fio_swap64(x) bswap_64(x)
+#endif /* fio_swapN */
#define CACHE_LINE_FILE \
"/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"
#define FIO_HAVE_FS_STAT
#define FIO_HAVE_GETTID
-#undef FIO_HAVE_CPU_AFFINITY /* XXX notyet */
+#undef FIO_HAVE_CPU_AFFINITY /* doesn't exist */
#define OS_MAP_ANON MAP_ANON
#include <sys/ioctl.h>
#include <sys/dkio.h>
#include <sys/disklabel.h>
+#include <sys/utsname.h>
/* XXX hack to avoid conflicts between rbtree.h and <sys/tree.h> */
#include <sys/sysctl.h>
#undef RB_BLACK
#define FIO_HAVE_GETTID
#define FIO_HAVE_SHM_ATTACH_REMOVED
-#undef FIO_HAVE_CPU_AFFINITY /* XXX notyet */
+#undef FIO_HAVE_CPU_AFFINITY /* doesn't exist */
#define OS_MAP_ANON MAP_ANON
static inline int gettid(void)
{
- return (int) pthread_self();
+ return (int)(intptr_t) pthread_self();
}
static inline unsigned long long get_fs_free_size(const char *path)
static inline int shm_attach_to_open_removed(void)
{
+ struct utsname uts;
+ int major, minor;
+
+ if (uname(&uts) == -1)
+ return 0;
+
/*
- * XXX: Return 1 if >= OpenBSD 5.1 according to 97900ebf.
+ * Return 1 if >= OpenBSD 5.1 according to 97900ebf,
+ * assuming both major/minor versions are < 10.
*/
+ if (uts.release[0] > '9' || uts.release[0] < '0')
+ return 0;
+ if (uts.release[1] != '.')
+ return 0;
+ if (uts.release[2] > '9' || uts.release[2] < '0')
+ return 0;
+
+ major = uts.release[0] - '0';
+ minor = uts.release[2] - '0';
+
+ if (major > 5)
+ return 1;
+ if (major == 5 && minor >= 1)
+ return 1;
+
return 0;
}
<Product Id="*"
Codepage="1252" Language="1033"
Manufacturer="fio" Name="fio"
- UpgradeCode="2338A332-5511-43CF-B9BD-5C60496CCFCC" Version="2.18">
+ UpgradeCode="2338A332-5511-43CF-B9BD-5C60496CCFCC" Version="2.19">
<Package
Description="Flexible IO Tester"
InstallerVersion="301" Keywords="Installer,MSI,Database"
#include <stdio.h>
#include <unistd.h>
-#include "../os/os.h"
-#include "oslib/linux-dev-lookup.h"
+#include "linux-dev-lookup.h"
int blktrace_lookup_device(const char *redirect, char *path, unsigned int maj,
unsigned int min)
#include <string.h>
-#include "oslib/strlcat.h"
+#include "strlcat.h"
size_t strlcat(char *dst, const char *src, size_t size)
{
const char *p = str;
char *c;
unsigned long long mult = 1;
+ int i;
/*
* Go forward until we hit a non-digit, or +/- sign
}
c = strdup(p);
- for (int i = 0; i < strlen(c); i++)
+ for (i = 0; i < strlen(c); i++)
c[i] = tolower(c[i]);
if (!strncmp("us", c, 2) || !strncmp("usec", c, 4))
*
*/
#include "fio.h"
-#include "ioengine.h"
+#include "ioengines.h"
#include "lib/getrusage.h"
#include "rate-submit.h"
struct flist_head next; /* Other sk_entry's, if linked command */
};
-struct sk_out {
- unsigned int refs; /* frees sk_out when it drops to zero.
- * protected by below ->lock */
-
- int sk; /* socket fd to talk to client */
- struct fio_mutex lock; /* protects ref and below list */
- struct flist_head list; /* list of pending transmit work */
- struct fio_mutex wait; /* wake backend when items added to list */
- struct fio_mutex xmit; /* held while sending data */
-};
-
static char *fio_server_arg;
static char *bind_sock;
static struct sockaddr_in saddr_in;
#define FIO_NET_PORT 8765
+struct sk_out {
+ unsigned int refs; /* frees sk_out when it drops to zero.
+ * protected by below ->lock */
+
+ int sk; /* socket fd to talk to client */
+ struct fio_mutex lock; /* protects ref and below list */
+ struct flist_head list; /* list of pending transmit work */
+ struct fio_mutex wait; /* wake backend when items added to list */
+ struct fio_mutex xmit; /* held while sending data */
+};
+
/*
* On-wire encoding is little endian
*/
};
enum {
- FIO_SERVER_VER = 60,
+ FIO_SERVER_VER = 61,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
td_io_u_unlock(td);
}
-static int add_bw_samples(struct thread_data *td, struct timeval *t)
+static int __add_samples(struct thread_data *td, struct timeval *parent_tv,
+ struct timeval *t, unsigned int avg_time,
+ uint64_t *this_io_bytes, uint64_t *stat_io_bytes,
+ struct io_stat *stat, struct io_log *log,
+ bool is_kb)
{
- struct thread_stat *ts = &td->ts;
unsigned long spent, rate;
enum fio_ddir ddir;
unsigned int next, next_log;
- next_log = td->o.bw_avg_time;
+ next_log = avg_time;
- spent = mtime_since(&td->bw_sample_time, t);
- if (spent < td->o.bw_avg_time &&
- td->o.bw_avg_time - spent >= LOG_MSEC_SLACK)
- return td->o.bw_avg_time - spent;
+ spent = mtime_since(parent_tv, t);
+ if (spent < avg_time && avg_time - spent >= LOG_MSEC_SLACK)
+ return avg_time - spent;
td_io_u_lock(td);
for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
uint64_t delta;
- delta = td->this_io_bytes[ddir] - td->stat_io_bytes[ddir];
+ delta = this_io_bytes[ddir] - stat_io_bytes[ddir];
if (!delta)
continue; /* No entries for interval */
- if (spent)
- rate = delta * 1000 / spent / 1024; /* KiB/s */
- else
+ if (spent) {
+ if (is_kb)
+ rate = delta * 1000 / spent / 1024; /* KiB/s */
+ else
+ rate = (delta * 1000) / spent;
+ } else
rate = 0;
- add_stat_sample(&ts->bw_stat[ddir], rate);
+ add_stat_sample(&stat[ddir], rate);
if (td->bw_log) {
unsigned int bs = 0;
if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
bs = td->o.min_bs[ddir];
- next = add_log_sample(td, td->bw_log, sample_val(rate),
- ddir, bs, 0);
+ next = add_log_sample(td, log, sample_val(rate), ddir, bs, 0);
next_log = min(next_log, next);
}
- td->stat_io_bytes[ddir] = td->this_io_bytes[ddir];
+ stat_io_bytes[ddir] = this_io_bytes[ddir];
}
- timeval_add_msec(&td->bw_sample_time, td->o.bw_avg_time);
+ timeval_add_msec(parent_tv, avg_time);
td_io_u_unlock(td);
- if (spent <= td->o.bw_avg_time)
- return min(next_log, td->o.bw_avg_time);
+ if (spent <= avg_time)
+ next = avg_time;
+ else
+ next = avg_time - (1 + spent - avg_time);
- next = td->o.bw_avg_time - (1 + spent - td->o.bw_avg_time);
return min(next, next_log);
}
+static int add_bw_samples(struct thread_data *td, struct timeval *t)
+{
+ return __add_samples(td, &td->bw_sample_time, t, td->o.bw_avg_time,
+ td->this_io_bytes, td->stat_io_bytes,
+ td->ts.bw_stat, td->bw_log, true);
+}
+
void add_iops_sample(struct thread_data *td, struct io_u *io_u,
unsigned int bytes)
{
static int add_iops_samples(struct thread_data *td, struct timeval *t)
{
- struct thread_stat *ts = &td->ts;
- unsigned long spent, iops;
- enum fio_ddir ddir;
- unsigned int next, next_log;
-
- next_log = td->o.iops_avg_time;
-
- spent = mtime_since(&td->iops_sample_time, t);
- if (spent < td->o.iops_avg_time &&
- td->o.iops_avg_time - spent >= LOG_MSEC_SLACK)
- return td->o.iops_avg_time - spent;
-
- td_io_u_lock(td);
-
- /*
- * Compute both read and write rates for the interval.
- */
- for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
- uint64_t delta;
-
- delta = td->this_io_blocks[ddir] - td->stat_io_blocks[ddir];
- if (!delta)
- continue; /* No entries for interval */
-
- if (spent)
- iops = (delta * 1000) / spent;
- else
- iops = 0;
-
- add_stat_sample(&ts->iops_stat[ddir], iops);
-
- if (td->iops_log) {
- unsigned int bs = 0;
-
- if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
- bs = td->o.min_bs[ddir];
-
- next = add_log_sample(td, td->iops_log,
- sample_val(iops), ddir, bs, 0);
- next_log = min(next_log, next);
- }
-
- td->stat_io_blocks[ddir] = td->this_io_blocks[ddir];
- }
-
- timeval_add_msec(&td->iops_sample_time, td->o.iops_avg_time);
-
- td_io_u_unlock(td);
-
- if (spent <= td->o.iops_avg_time)
- return min(next_log, td->o.iops_avg_time);
-
- next = td->o.iops_avg_time - (1 + spent - td->o.iops_avg_time);
- return min(next, next_log);
+ return __add_samples(td, &td->iops_sample_time, t, td->o.iops_avg_time,
+ td->this_io_blocks, td->stat_io_blocks,
+ td->ts.iops_stat, td->iops_log, false);
}
/*
#include <fcntl.h>
#include <string.h>
-#include "../lib/rbtree.h"
#include "../flist.h"
#include "../log.h"
#include "../mutex.h"
#include "../os/os.h"
#include "../gettime.h"
#include "../fio_time.h"
+#include "../lib/rbtree.h"
#include "../lib/bloom.h"
#include "debug.h"
MEM_MMAP, /* use anonynomous mmap */
MEM_MMAPHUGE, /* memory mapped huge file */
MEM_MMAPSHARED, /* use mmap with shared flag */
+ MEM_CUDA_MALLOC,/* use GPU memory */
};
#define ERROR_STR_MAX 128
unsigned short numa_mem_mode;
unsigned int numa_mem_prefer_node;
char *numa_memnodes;
+ unsigned int gpu_dev_id;
+
unsigned int iolog;
unsigned int rwmixcycle;
unsigned int rwmix[DDIR_RWDIR_CNT];
uint32_t iodepth_batch;
uint32_t iodepth_batch_complete_min;
uint32_t iodepth_batch_complete_max;
- uint32_t __proper_alignment_for_64b;
uint64_t size;
uint64_t io_size;
uint32_t bs_unaligned;
uint32_t fsync_on_close;
uint32_t bs_is_seq_rand;
- uint32_t pad1;
uint32_t random_distribution;
uint32_t exitall_error;
uint8_t verify_cpumask[FIO_TOP_STR_MAX];
uint8_t log_gz_cpumask[FIO_TOP_STR_MAX];
#endif
+ uint32_t gpu_dev_id;
+ uint32_t pad;
uint32_t cpus_allowed_policy;
uint32_t iolog;
uint32_t rwmixcycle;
void timeval_add_msec(struct timeval *tv, unsigned int msec)
{
- tv->tv_usec += 1000 * msec;
- if (tv->tv_usec >= 1000000) {
+ unsigned long adj_usec = 1000 * msec;
+
+ tv->tv_usec += adj_usec;
+ if (adj_usec >= 1000000) {
+ unsigned long adj_sec = adj_usec / 1000000;
+
+ tv->tv_usec -= adj_sec * 1000000;
+ tv->tv_sec += adj_sec;
+ }
+ if (tv->tv_usec >= 1000000){
tv->tv_usec -= 1000000;
tv->tv_sec++;
}