summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>2021-09-30 09:02:36 +0900
committerJens Axboe <axboe@kernel.dk>2021-09-30 10:05:23 -0600
commit0f77c977ab44a10d69268546a849376efc327d47 (patch)
treedd36ead146151ceb9c581fc3185853bc5d676369
parent203e4c2624493c0db8c69c9ad830090c5b79be67 (diff)
downloadfio-0f77c977ab44a10d69268546a849376efc327d47.tar.gz
fio-0f77c977ab44a10d69268546a849376efc327d47.tar.bz2
zbd: Fix unexpected job termination by open zone search failure
Test case #46 in t/zbd/test-zbd-support fails when it is repeated hundreds of times on null_blk zoned devices. The test case uses libaio IO engine to run 8 random write jobs on 4 sequential write required zones. When all of the 4 zones get almost full but still open for in-flight writes, the helper function zbd_convert_to_open_zone() fails to get an opened zone for next write. This results in unexpected job termination. To avoid the unexpected job termination, retry the steps in zbd_convert_to_open_zone(). Before retry, call io_u_quiesce() to ensure that the in-flight writes get completed. To prevent infinite loop by the retry, retry only when any IOs are in-flight or in-flight IOs get completed. To check in-flight IO count of all jobs, add a new helper function any_io_in_flight(). Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com> Reviewed-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> Link: https://lore.kernel.org/r/20210930000236.4116945-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--zbd.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/zbd.c b/zbd.c
index 64415d2b..c0b0b81c 100644
--- a/zbd.c
+++ b/zbd.c
@@ -1204,6 +1204,19 @@ static uint32_t pick_random_zone_idx(const struct fio_file *f,
f->io_size;
}
+static bool any_io_in_flight(void)
+{
+ struct thread_data *td;
+ int i;
+
+ for_each_td(td, i) {
+ if (td->io_u_in_flight)
+ return true;
+ }
+
+ return false;
+}
+
/*
* Modify the offset of an I/O unit that does not refer to an open zone such
* that it refers to an open zone. Close an open zone and open a new zone if
@@ -1223,6 +1236,8 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
uint32_t zone_idx, new_zone_idx;
int i;
bool wait_zone_close;
+ bool in_flight;
+ bool should_retry = true;
assert(is_valid_offset(f, io_u->offset));
@@ -1337,6 +1352,7 @@ open_other_zone:
io_u_quiesce(td);
}
+retry:
/* Zone 'z' is full, so try to open a new zone. */
for (i = f->io_size / zbdi->zone_size; i > 0; i--) {
zone_idx++;
@@ -1376,6 +1392,24 @@ open_other_zone:
goto out;
pthread_mutex_lock(&zbdi->mutex);
}
+
+ /*
+ * When any I/O is in-flight or when all I/Os in-flight get completed,
+ * the I/Os might have closed zones then retry the steps to open a zone.
+ * Before retry, call io_u_quiesce() to complete in-flight writes.
+ */
+ in_flight = any_io_in_flight();
+ if (in_flight || should_retry) {
+ dprint(FD_ZBD, "%s(%s): wait zone close and retry open zones\n",
+ __func__, f->file_name);
+ pthread_mutex_unlock(&zbdi->mutex);
+ zone_unlock(z);
+ io_u_quiesce(td);
+ zone_lock(td, f, z);
+ should_retry = in_flight;
+ goto retry;
+ }
+
pthread_mutex_unlock(&zbdi->mutex);
zone_unlock(z);
dprint(FD_ZBD, "%s(%s): did not open another zone\n", __func__,