OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o filesetup.o \
eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
rbtree.o diskutil.o fifo.o blktrace.o smalloc.o filehash.o helpers.o \
- cgroup.o profile.o debug.o
+ cgroup.o profile.o debug.o trim.o
OBJS += lib/rand.o
OBJS += lib/flist_sort.o
all: depend $(PROGS) $(SCRIPTS)
clean:
- -rm -f .depend cscope.out $(OBJS) $(PROGS) core.* core
+ -rm -f .depend $(OBJS) $(PROGS) core.* core
cscope:
@cscope -b -R
struct io_piece *ipo;
ipo = calloc(1, sizeof(*ipo));
+ init_ipo(ipo);
ipo->ddir = DDIR_INVAL;
ipo->fileno = fileno;
{
struct io_piece *ipo = malloc(sizeof(*ipo));
- memset(ipo, 0, sizeof(*ipo));
- INIT_FLIST_HEAD(&ipo->list);
+ init_ipo(ipo);
+
/*
* the 512 is wrong here, it should be the hardware sector size...
*/
{
struct io_piece *ipo = malloc(sizeof(*ipo));
+ init_ipo(ipo);
trace_add_file(td, t->device);
ios[DDIR_WRITE]++;
INIT_FLIST_HEAD(&td->io_log_list);
INIT_FLIST_HEAD(&td->io_hist_list);
INIT_FLIST_HEAD(&td->verify_list);
+ INIT_FLIST_HEAD(&td->trim_list);
pthread_mutex_init(&td->io_u_lock, NULL);
td->io_hist_tree = RB_ROOT;
unsigned int io_u_complete[FIO_IO_U_MAP_NR];
unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR];
unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR];
- unsigned long total_io_u[2];
- unsigned long short_io_u[2];
+ unsigned long total_io_u[3];
+ unsigned long short_io_u[3];
unsigned long total_submit;
unsigned long total_complete;
unsigned int gtod_offload;
enum fio_cs clocksource;
unsigned int no_stall;
+ unsigned int trim_percentage;
+ unsigned int trim_batch;
+ unsigned int trim_zero;
+ unsigned long long trim_backlog;
char *read_iolog_file;
char *write_iolog_file;
char *sysfs_root;
- unsigned long rand_seeds[6];
+ unsigned long rand_seeds[7];
os_random_state_t bsrange_state;
os_random_state_t verify_state;
+ os_random_state_t trim_state;
unsigned int verify_batch;
+ unsigned int trim_batch;
int shm_id;
*/
struct flist_head io_log_list;
+ /*
+ * For tracking/handling discards
+ */
+ struct flist_head trim_list;
+ unsigned long trim_entries;
+
/*
* for fileservice, how often to switch to a new file
*/
os_random_seed(td->rand_seeds[3], &td->next_file_state);
os_random_seed(td->rand_seeds[5], &td->file_size_state);
+ os_random_seed(td->rand_seeds[6], &td->trim_state);
if (!td_random(td))
return;
#include "fio.h"
#include "hash.h"
#include "verify.h"
+#include "trim.h"
#include "lib/rand.h"
struct io_completion_data {
return io_u;
}
-/*
- * Return an io_u to be processed. Gets a buflen and offset, sets direction,
- * etc. The returned io_u is fully ready to be prepped and submitted.
- */
-struct io_u *get_io_u(struct thread_data *td)
+static int check_get_trim(struct thread_data *td, struct io_u *io_u)
{
- struct fio_file *f;
- struct io_u *io_u;
+ if (td->o.trim_backlog && td->trim_entries) {
+ int get_trim = 0;
- io_u = __get_io_u(td);
- if (!io_u) {
- dprint(FD_IO, "__get_io_u failed\n");
- return NULL;
+ if (td->trim_batch) {
+ td->trim_batch--;
+ get_trim = 1;
+ } else if (!(td->io_hist_len % td->o.trim_backlog) &&
+ td->last_ddir != DDIR_READ) {
+ td->trim_batch = td->o.trim_batch;
+ if (!td->trim_batch)
+ td->trim_batch = td->o.trim_backlog;
+ get_trim = 1;
+ }
+
+ if (get_trim && !get_next_trim(td, io_u))
+ return 1;
}
+ return 0;
+}
+
+static int check_get_verify(struct thread_data *td, struct io_u *io_u)
+{
if (td->o.verify_backlog && td->io_hist_len) {
int get_verify = 0;
}
if (get_verify && !get_next_verify(td, io_u))
- goto out;
+ return 1;
}
+ return 0;
+}
+
+/*
+ * Return an io_u to be processed. Gets a buflen and offset, sets direction,
+ * etc. The returned io_u is fully ready to be prepped and submitted.
+ */
+struct io_u *get_io_u(struct thread_data *td)
+{
+ struct fio_file *f;
+ struct io_u *io_u;
+
+ io_u = __get_io_u(td);
+ if (!io_u) {
+ dprint(FD_IO, "__get_io_u failed\n");
+ return NULL;
+ }
+
+ if (check_get_verify(td, io_u))
+ goto out;
+ if (check_get_trim(td, io_u))
+ goto out;
+
/*
* from a requeue, io_u already setup
*/
io_u->xfer_buflen = io_u->buflen;
out:
+ assert(io_u->file);
if (!td_io_prep(td, io_u)) {
if (!td->o.disable_slat)
fio_gettime(&io_u->start_time, NULL);
IO_U_F_FREE_DEF = 1 << 2,
IO_U_F_IN_CUR_DEPTH = 1 << 3,
IO_U_F_BUSY_OK = 1 << 4,
+ IO_U_F_TRIMMED = 1 << 5,
};
/*
if (ddir_rw(io_u->ddir)) {
io_u_mark_depth(td, 1);
td->ts.total_io_u[io_u->ddir]++;
- }
+ } else if (io_u->ddir == DDIR_TRIM)
+ td->ts.total_io_u[2]++;
} else if (ret == FIO_Q_QUEUED) {
int r;
struct io_sample *log;
};
+enum {
+ IP_F_ONRB = 1,
+ IP_F_ONLIST = 2,
+ IP_F_TRIMMED = 4,
+};
+
/*
* When logging io actions, this matches a single sent io_u
*/
struct rb_node rb_node;
struct flist_head list;
};
+ struct flist_head trim_list;
union {
int fileno;
struct fio_file *file;
};
unsigned long long offset;
unsigned long len;
+ unsigned long flags;
enum fio_ddir ddir;
union {
unsigned long delay;
extern int write_bw_log;
extern void add_agg_sample(unsigned long, enum fio_ddir, unsigned int);
+static inline void init_ipo(struct io_piece *ipo)
+{
+ memset(ipo, 0, sizeof(*ipo));
+ INIT_FLIST_HEAD(&ipo->trim_list);
+}
+
#endif
#include "flist.h"
#include "fio.h"
#include "verify.h"
+#include "trim.h"
static const char iolog_ver2[] = "fio version 2 iolog";
ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
flist_del(&ipo->list);
+ remove_trim_entry(td, ipo);
ret = ipo_special(td, ipo);
if (ret < 0) {
while ((n = rb_first(&td->io_hist_tree)) != NULL) {
ipo = rb_entry(n, struct io_piece, rb_node);
rb_erase(n, &td->io_hist_tree);
+ remove_trim_entry(td, ipo);
td->io_hist_len--;
free(ipo);
}
while (!flist_empty(&td->io_hist_list)) {
ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
flist_del(&ipo->list);
+ remove_trim_entry(td, ipo);
td->io_hist_len--;
free(ipo);
}
struct io_piece *ipo, *__ipo;
ipo = malloc(sizeof(struct io_piece));
+ init_ipo(ipo);
ipo->file = io_u->file;
ipo->offset = io_u->offset;
ipo->len = io_u->buflen;
+ if (io_u_should_trim(td, io_u)) {
+ flist_add_tail(&ipo->trim_list, &td->trim_list);
+ td->trim_entries++;
+ }
+
/*
* We don't need to sort the entries, if:
*
(file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
INIT_FLIST_HEAD(&ipo->list);
flist_add_tail(&ipo->list, &td->io_hist_list);
+ ipo->flags |= IP_F_ONLIST;
td->io_hist_len++;
return;
}
assert(ipo->len == __ipo->len);
td->io_hist_len--;
rb_erase(parent, &td->io_hist_tree);
+ remove_trim_entry(td, __ipo);
free(__ipo);
goto restart;
}
rb_link_node(&ipo->rb_node, parent, p);
rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
+ ipo->flags |= IP_F_ONRB;
td->io_hist_len++;
}
* Make note of file
*/
ipo = malloc(sizeof(*ipo));
- memset(ipo, 0, sizeof(*ipo));
- INIT_FLIST_HEAD(&ipo->list);
+ init_ipo(ipo);
ipo->ddir = rw;
if (rw == DDIR_WAIT) {
ipo->delay = offset;
}
#endif
+#ifdef FIO_HAVE_TRIM
+static int str_verify_trim_cb(void *data, unsigned long long *val)
+{
+ struct thread_data *td = data;
+
+ td->o.trim_percentage = *val;
+ return 0;
+}
+#endif
+
static int str_fst_cb(void *data, const char *str)
{
struct thread_data *td = data;
.type = FIO_OPT_INT,
.off1 = td_var_offset(verify_batch),
.help = "Verify this number of IO blocks",
- .parent = "verify_backlog",
+ .parent = "verify",
},
#ifdef FIO_HAVE_CPU_AFFINITY
{
.help = "Set CPUs allowed for async verify threads",
.parent = "verify_async",
},
+#endif
+#ifdef FIO_HAVE_TRIM
+ {
+ .name = "trim_percentage",
+ .type = FIO_OPT_INT,
+ .cb = str_verify_trim_cb,
+ .maxval = 100,
+ .help = "Number of verify blocks to discard/trim",
+ .parent = "verify",
+ .def = "0",
+ },
+ {
+ .name = "trim_verify_zero",
+ .type = FIO_OPT_INT,
+ .help = "Verify that trim/discarded blocks are returned as zeroes",
+ .off1 = td_var_offset(trim_zero),
+ .parent = "trim_percentage",
+ .def = "1",
+ },
+ {
+ .name = "trim_backlog",
+ .type = FIO_OPT_STR_VAL,
+ .off1 = td_var_offset(trim_backlog),
+ .help = "Trim after this number of blocks are written",
+ .parent = "trim_percentage",
+ },
+ {
+ .name = "trim_backlog_batch",
+ .type = FIO_OPT_INT,
+ .off1 = td_var_offset(trim_batch),
+ .help = "Trim this number of IO blocks",
+ .parent = "trim_percentage",
+ },
#endif
{
.name = "write_iolog",
io_u_dist[1], io_u_dist[2],
io_u_dist[3], io_u_dist[4],
io_u_dist[5], io_u_dist[6]);
- log_info(" issued r/w: total=%lu/%lu, short=%lu/%lu\n",
+ log_info(" issued r/w/d: total=%lu/%lu/%lu, short=%lu/%lu/%lu\n",
ts->total_io_u[0], ts->total_io_u[1],
- ts->short_io_u[0], ts->short_io_u[1]);
+ ts->total_io_u[2],
+ ts->short_io_u[0], ts->short_io_u[1],
+ ts->short_io_u[2]);
stat_calc_lat_u(ts, io_u_lat_u);
stat_calc_lat_m(ts, io_u_lat_m);
show_latencies(io_u_lat_u, io_u_lat_m);
ts->io_u_lat_m[k] += td->ts.io_u_lat_m[k];
- for (k = 0; k <= DDIR_WRITE; k++) {
+ for (k = 0; k <= 2; k++) {
ts->total_io_u[k] += td->ts.total_io_u[k];
ts->short_io_u[k] += td->ts.short_io_u[k];
}
--- /dev/null
+/*
+ * TRIM/DISCARD support
+ */
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "fio.h"
+#include "trim.h"
+
+#ifdef FIO_HAVE_TRIM
+int get_next_trim(struct thread_data *td, struct io_u *io_u)
+{
+ struct io_piece *ipo;
+
+ /*
+ * this io_u is from a requeue, we already filled the offsets
+ */
+ if (io_u->file)
+ return 0;
+ if (flist_empty(&td->trim_list))
+ return 0;
+
+ assert(td->trim_entries);
+ ipo = flist_entry(td->trim_list.next, struct io_piece, trim_list);
+ remove_trim_entry(td, ipo);
+ ipo->flags |= IP_F_TRIMMED;
+
+ /*
+ * If not verifying that trimmed ranges return zeroed data,
+ * remove this from the to-read verify lists
+ */
+ if (!td->o.trim_zero) {
+ if (ipo->flags & IP_F_ONLIST)
+ flist_del(&ipo->list);
+ else {
+ assert(ipo->flags & IP_F_ONRB);
+ rb_erase(&ipo->rb_node, &td->io_hist_tree);
+ }
+ td->io_hist_len--;
+ }
+
+ io_u->offset = ipo->offset;
+ io_u->buflen = ipo->len;
+ io_u->file = ipo->file;
+
+ if (!fio_file_open(io_u->file)) {
+ int r = td_io_open_file(td, io_u->file);
+
+ if (r) {
+ dprint(FD_VERIFY, "failed file %s open\n",
+ io_u->file->file_name);
+ return 1;
+ }
+ }
+
+ get_file(ipo->file);
+ assert(fio_file_open(io_u->file));
+ io_u->ddir = DDIR_TRIM;
+ io_u->xfer_buf = NULL;
+ io_u->xfer_buflen = io_u->buflen;
+
+ free(ipo);
+ dprint(FD_VERIFY, "get_next_trim: ret io_u %p\n", io_u);
+ return 0;
+}
+
+int io_u_should_trim(struct thread_data *td, struct io_u *io_u)
+{
+ unsigned long long val;
+ long r;
+
+ if (!td->o.trim_percentage)
+ return 0;
+
+ r = os_random_long(&td->trim_state);
+ val = (OS_RAND_MAX / 100ULL);
+ val *= (unsigned long long) td->o.trim_percentage;
+
+ return r <= val;
+}
+#endif
--- /dev/null
+#ifndef FIO_TRIM_H
+#define FIO_TRIM_H
+
+#include "fio.h"
+
+#ifdef FIO_HAVE_TRIM
+extern int __must_check get_next_trim(struct thread_data *td, struct io_u *io_u);
+extern int io_u_should_trim(struct thread_data *td, struct io_u *io_u);
+
+/*
+ * Determine whether a given io_u should be logged for verify or
+ * for discard
+ */
+static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ipo)
+{
+ if (!flist_empty(&ipo->trim_list)) {
+ flist_del_init(&ipo->trim_list);
+ td->trim_entries--;
+ }
+}
+
+#else
+static inline int get_next_trim(struct thread_data *td, struct io_u *io_u)
+{
+ return 1;
+}
+static inline int io_u_should_trim(struct thread_data *td, struct io_u *io_u)
+{
+ return 0;
+}
+static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ipo)
+{
+}
+#error foo
+#endif
+
+#endif
#include "fio.h"
#include "verify.h"
#include "smalloc.h"
+#include "trim.h"
#include "lib/rand.h"
#include "crc/md5.h"
return 0;
}
+static int verify_trimmed_io_u(struct thread_data *td, struct io_u *io_u)
+{
+ static char zero_buf[1024];
+ unsigned int this_len, len;
+ int ret = 0;
+ void *p;
+
+ if (!td->o.trim_zero)
+ return 0;
+
+ len = io_u->buflen;
+ p = io_u->buf;
+ do {
+ this_len = sizeof(zero_buf);
+ if (this_len > len)
+ this_len = len;
+ if (memcmp(p, zero_buf, this_len)) {
+ ret = EILSEQ;
+ break;
+ }
+ len -= this_len;
+ p += this_len;
+ } while (len);
+
+ if (!ret)
+ return 0;
+
+ log_err("trims: verify failed at file %s offset %llu, length %lu\n",
+ io_u->file->file_name, io_u->offset, io_u->buflen);
+ return ret;
+}
+
int verify_io_u(struct thread_data *td, struct io_u *io_u)
{
struct verify_header *hdr;
if (td->o.verify == VERIFY_NULL || io_u->ddir != DDIR_READ)
return 0;
+ if (io_u->flags & IO_U_F_TRIMMED) {
+ ret = verify_trimmed_io_u(td, io_u);
+ goto done;
+ }
hdr_inc = io_u->buflen;
if (td->o.verify_interval)
}
}
+done:
if (ret && td->o.verify_fatal)
td->terminate = 1;
ipo = rb_entry(n, struct io_piece, rb_node);
rb_erase(n, &td->io_hist_tree);
- td->io_hist_len--;
} else if (!flist_empty(&td->io_hist_list)) {
ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
- td->io_hist_len--;
flist_del(&ipo->list);
}
if (ipo) {
+ td->io_hist_len--;
+
io_u->offset = ipo->offset;
io_u->buflen = ipo->len;
io_u->file = ipo->file;
+ if (ipo->flags & IP_F_TRIMMED)
+ io_u->flags |= IO_U_F_TRIMMED;
+
if (!fio_file_open(io_u->file)) {
int r = td_io_open_file(td, io_u->file);
io_u->ddir = DDIR_READ;
io_u->xfer_buf = io_u->buf;
io_u->xfer_buflen = io_u->buflen;
+
+ remove_trim_entry(td, ipo);
free(ipo);
dprint(FD_VERIFY, "get_next_verify: ret io_u %p\n", io_u);
return 0;