When asynchronous write workloads have the verify option and the
verify_state_save option enabled, fio saves the last write completions
in the verify state file. The number of writes saved in the verify state
file is equal to the I/O depth specified by the iodepth option. Let N
represent the iodepth. Subsequent verify workloads with the
verify_state_load option read the saved verify state file. As to the
last N verify I/Os, fio checks if they have corresponding write
completions saved by the write workloads. If not, fio does not perform
the verify I/Os since the verify data was not written. This approach
prevents the false-positive verify failures. Refer to the following two
commits for the detail:
ca09be4b1a8e ("Add support for verify triggers and verify state saving")
94a6e1bb4e7d ("Fix verify state for multiple files")
However, when the write workloads are asynchronous, the completion order
of writes can differ from their issue order. In such cases, a write
named "W_before_last_N", issued before the last N writes, may complete
slowly, fall within the last N completions, and be saved in the verify
state file. Conversely, one of the last issued N writes, named
"W_in_last_N", may complete early, and not be saved in the verify state
file. When the subsequent verify workload reads the verify state file
and runs, fio prepares the I/O for the "W_in_last_N" at some point. Fio
tries to find its offset in the verify state file, but that is not
found. Then fio stops the verify workload. This unexpected verify stop
confuses users.
To reduce the chance of the unexpected verify stop due to fluctuations
in the write completion order, increase the number of write completions
saved in the verify state file. Since this issue only occurs with
asynchronous writes, increase the number only for asynchronous
workloads. Add a new field "last_write_comp_depth" to struct thread_data
to store the number. To adjust the size of the verify state file, add a
new field "max_no_comps_per_file" to struct thread_io_list. This field
reflects the number of writes to be saved for each file and allows to
calculate the state file size. These changes affect the verify state
file format, then bump up the verify state header version from 3 to 4.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Link: https://lore.kernel.org/r/20250213052510.1474423-3-shinichiro.kawasaki@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
if (td->o.verify == VERIFY_NONE || !td->o.verify_state_save)
return 0;
+ /*
+ * Async IO completion order may be different from issue order. Double
+ * the number of write completions to cover the case the writes issued
+ * earlier complete slowly and fall in the last write log entries.
+ */
+ td->last_write_comp_depth = depth;
+ if (!td_ioengine_flagged(td, FIO_SYNCIO))
+ td->last_write_comp_depth += depth;
+
for_each_file(td, f, i) {
- f->last_write_comp = scalloc(depth, sizeof(uint64_t));
+ f->last_write_comp = scalloc(td->last_write_comp_depth,
+ sizeof(uint64_t));
if (!f->last_write_comp)
goto cleanup;
}
struct fio_sem *sem;
uint64_t bytes_done[DDIR_RWDIR_CNT];
uint64_t bytes_verified;
+ uint32_t last_write_comp_depth;
uint64_t *thinktime_blocks_counter;
struct timespec last_thinktime;
idx = f->last_write_idx++;
f->last_write_comp[idx] = offset;
- if (f->last_write_idx == td->o.iodepth)
+ if (f->last_write_idx == td->last_write_comp_depth)
f->last_write_idx = 0;
}
};
struct thread_io_list {
- uint64_t no_comps;
- uint32_t depth;
+ uint64_t no_comps; /* Number of completions saved for the thread */
+ uint32_t depth; /* I/O depth of the job that saves the verify state */
+ uint32_t max_no_comps_per_file;
uint32_t nofiles;
uint64_t numberio;
uint64_t index;
struct thread_io_list state[0];
};
-#define VSTATE_HDR_VERSION 0x03
+#define VSTATE_HDR_VERSION 0x04
struct verify_state_hdr {
uint64_t version;
static inline size_t thread_io_list_sz(struct thread_io_list *s)
{
- return __thread_io_list_sz(le32_to_cpu(s->depth), le32_to_cpu(s->nofiles));
+ return __thread_io_list_sz(le32_to_cpu(s->max_no_comps_per_file), le32_to_cpu(s->nofiles));
}
static inline struct thread_io_list *io_list_next(struct thread_io_list *s)
if (!f->last_write_comp)
return 0;
- if (td->io_blocks[DDIR_WRITE] < td->o.iodepth)
+ if (td->io_blocks[DDIR_WRITE] < td->last_write_comp_depth)
comps = td->io_blocks[DDIR_WRITE];
else
- comps = td->o.iodepth;
+ comps = td->last_write_comp_depth;
j = f->last_write_idx - 1;
for (i = 0; i < comps; i++) {
if (j == -1)
- j = td->o.iodepth - 1;
+ j = td->last_write_comp_depth - 1;
s->comps[*index].fileno = __cpu_to_le64(f->fileno);
s->comps[*index].offset = cpu_to_le64(f->last_write_comp[j]);
(*index)++;
continue;
td->stop_io = 1;
td->flags |= TD_F_VSTATE_SAVED;
- depth += (td->o.iodepth * td->o.nr_files);
+ depth += (td->last_write_comp_depth * td->o.nr_files);
nr++;
} end_for_each();
s->no_comps = cpu_to_le64((uint64_t) comps);
s->depth = cpu_to_le32((uint32_t) td->o.iodepth);
+ s->max_no_comps_per_file = cpu_to_le32((uint32_t) td->last_write_comp_depth);
s->nofiles = cpu_to_le32((uint32_t) td->o.nr_files);
s->numberio = cpu_to_le64((uint64_t) td->io_issues[DDIR_WRITE]);
s->index = cpu_to_le64((uint64_t) __td_index);
s->no_comps = le64_to_cpu(s->no_comps);
s->depth = le32_to_cpu(s->depth);
+ s->max_no_comps_per_file = le32_to_cpu(s->max_no_comps_per_file);
s->nofiles = le32_to_cpu(s->nofiles);
s->numberio = le64_to_cpu(s->numberio);
s->rand.use64 = le64_to_cpu(s->rand.use64);