ret = blkzoned_report_zones(td, f, offset, zones, nr_zones);
if (ret < 0) {
td_verror(td, errno, "report zones failed");
- log_err("%s: report zones from sector %llu failed (%d).\n",
- f->file_name, (unsigned long long)offset >> 9, errno);
+ log_err("%s: report zones from sector %"PRIu64" failed (%d).\n",
+ f->file_name, offset >> 9, errno);
} else if (ret == 0) {
td_verror(td, errno, "Empty zone report");
- log_err("%s: report zones from sector %llu is empty.\n",
- f->file_name, (unsigned long long)offset >> 9);
+ log_err("%s: report zones from sector %"PRIu64" is empty.\n",
+ f->file_name, offset >> 9);
ret = -EIO;
}
ret = blkzoned_reset_wp(td, f, offset, length);
if (ret < 0) {
td_verror(td, errno, "resetting wp failed");
- log_err("%s: resetting wp for %llu sectors at sector %llu failed (%d).\n",
- f->file_name, (unsigned long long)length >> 9,
- (unsigned long long)offset >> 9, errno);
+ log_err("%s: resetting wp for %"PRIu64" sectors at sector %"PRIu64" failed (%d).\n",
+ f->file_name, length >> 9, offset >> 9, errno);
}
return ret;
return false;
}
} else if (td->o.zone_size != f->zbd_info->zone_size) {
- log_err("%s: job parameter zonesize %llu does not match disk zone size %llu.\n",
- f->file_name, (unsigned long long) td->o.zone_size,
- (unsigned long long) f->zbd_info->zone_size);
+ log_err("%s: job parameter zonesize %llu does not match disk zone size %"PRIu64".\n",
+ f->file_name, td->o.zone_size,
+ f->zbd_info->zone_size);
return false;
}
if (td->o.zone_skip % td->o.zone_size) {
log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n",
- f->file_name, (unsigned long long) td->o.zone_skip,
- (unsigned long long) td->o.zone_size);
+ f->file_name, td->o.zone_skip,
+ td->o.zone_size);
return false;
}
f->file_name);
return false;
}
- log_info("%s: rounded up offset from %llu to %llu\n",
- f->file_name, (unsigned long long) f->file_offset,
- (unsigned long long) new_offset);
+ log_info("%s: rounded up offset from %"PRIu64" to %"PRIu64"\n",
+ f->file_name, f->file_offset,
+ new_offset);
f->io_size -= (new_offset - f->file_offset);
f->file_offset = new_offset;
}
f->file_name);
return false;
}
- log_info("%s: rounded down io_size from %llu to %llu\n",
- f->file_name, (unsigned long long) f->io_size,
- (unsigned long long) new_end - f->file_offset);
+ log_info("%s: rounded down io_size from %"PRIu64" to %"PRIu64"\n",
+ f->file_name, f->io_size,
+ new_end - f->file_offset);
f->io_size = new_end - f->file_offset;
}
}
int i, j, k;
for_each_td(td, i) {
+ if (td_trim(td) &&
+ (td->o.min_bs[DDIR_TRIM] != td->o.max_bs[DDIR_TRIM] ||
+ td->o.bssplit_nr[DDIR_TRIM])) {
+ log_info("bsrange and bssplit are not allowed for trim with zonemode=zbd\n");
+ return false;
+ }
for_each_file(td, f, j) {
uint64_t zone_size;
if (!f->zbd_info)
continue;
zone_size = f->zbd_info->zone_size;
+ if (td_trim(td) && td->o.bs[DDIR_TRIM] != zone_size) {
+ log_info("%s: trim block size %llu is not the zone size %"PRIu64"\n",
+ f->file_name, td->o.bs[DDIR_TRIM],
+ zone_size);
+ return false;
+ }
for (k = 0; k < FIO_ARRAY_SIZE(td->o.bs); k++) {
if (td->o.verify != VERIFY_NONE &&
zone_size % td->o.bs[k] != 0) {
- log_info("%s: block size %llu is not a divisor of the zone size %llu\n",
+ log_info("%s: block size %llu is not a divisor of the zone size %"PRIu64"\n",
f->file_name, td->o.bs[k],
- (unsigned long long)zone_size);
+ zone_size);
return false;
}
}
if (zone_capacity > zone_size) {
log_err("%s: job parameter zonecapacity %llu is larger than zone size %llu\n",
- f->file_name, (unsigned long long) td->o.zone_capacity,
- (unsigned long long) td->o.zone_size);
+ f->file_name, td->o.zone_capacity, td->o.zone_size);
return 1;
}
if (td->o.zone_size == 0) {
td->o.zone_size = zone_size;
} else if (td->o.zone_size != zone_size) {
- log_err("fio: %s job parameter zonesize %llu does not match disk zone size %llu.\n",
- f->file_name, (unsigned long long) td->o.zone_size,
- (unsigned long long) zone_size);
+ log_err("fio: %s job parameter zonesize %llu does not match disk zone size %"PRIu64".\n",
+ f->file_name, td->o.zone_size, zone_size);
ret = -EINVAL;
goto out;
}
- dprint(FD_ZBD, "Device %s has %d zones of size %llu KB\n", f->file_name,
- nr_zones, (unsigned long long) zone_size / 1024);
+ dprint(FD_ZBD, "Device %s has %d zones of size %"PRIu64" KB\n", f->file_name,
+ nr_zones, zone_size / 1024);
zbd_info = scalloc(1, sizeof(*zbd_info) +
(nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
ZBD_REPORT_MAX_ZONES));
if (nrz < 0) {
ret = nrz;
- log_info("fio: report zones (offset %llu) failed for %s (%d).\n",
- (unsigned long long)offset,
- f->file_name, -ret);
+ log_info("fio: report zones (offset %"PRIu64") failed for %s (%d).\n",
+ offset, f->file_name, -ret);
goto out;
}
}
struct fio_zone_info *const ze)
{
struct fio_zone_info *z;
- const uint32_t min_bs = td->o.min_bs[DDIR_WRITE];
+ const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
int res = 0;
assert(min_bs);
static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
uint32_t zone_idx)
{
- const uint32_t min_bs = td->o.min_bs[DDIR_WRITE];
+ const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
struct zoned_block_device_info *zbdi = f->zbd_info;
struct fio_zone_info *z = get_zone(f, zone_idx);
bool res = true;
return res;
}
-/* Anything goes as long as it is not a constant. */
+/* Return random zone index for one of the open zones. */
static uint32_t pick_random_zone_idx(const struct fio_file *f,
const struct io_u *io_u)
{
- return io_u->offset * f->zbd_info->num_open_zones / f->real_file_size;
+ return (io_u->offset - f->file_offset) * f->zbd_info->num_open_zones /
+ f->io_size;
+}
+
+static bool any_io_in_flight(void)
+{
+ struct thread_data *td;
+ int i;
+
+ for_each_td(td, i) {
+ if (td->io_u_in_flight)
+ return true;
+ }
+
+ return false;
}
/*
static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
struct io_u *io_u)
{
- const uint32_t min_bs = td->o.min_bs[io_u->ddir];
+ const uint64_t min_bs = td->o.min_bs[io_u->ddir];
struct fio_file *f = io_u->file;
struct zoned_block_device_info *zbdi = f->zbd_info;
struct fio_zone_info *z;
uint32_t zone_idx, new_zone_idx;
int i;
bool wait_zone_close;
+ bool in_flight;
+ bool should_retry = true;
assert(is_valid_offset(f, io_u->offset));
io_u_quiesce(td);
}
+retry:
/* Zone 'z' is full, so try to open a new zone. */
for (i = f->io_size / zbdi->zone_size; i > 0; i--) {
zone_idx++;
goto out;
pthread_mutex_lock(&zbdi->mutex);
}
+
+ /*
+ * When any I/O is in-flight or when all I/Os in-flight get completed,
+ * the I/Os might have closed zones then retry the steps to open a zone.
+ * Before retry, call io_u_quiesce() to complete in-flight writes.
+ */
+ in_flight = any_io_in_flight();
+ if (in_flight || should_retry) {
+ dprint(FD_ZBD, "%s(%s): wait zone close and retry open zones\n",
+ __func__, f->file_name);
+ pthread_mutex_unlock(&zbdi->mutex);
+ zone_unlock(z);
+ io_u_quiesce(td);
+ zone_lock(td, f, z);
+ should_retry = in_flight;
+ goto retry;
+ }
+
pthread_mutex_unlock(&zbdi->mutex);
zone_unlock(z);
dprint(FD_ZBD, "%s(%s): did not open another zone\n", __func__,
struct fio_zone_info *z)
{
const struct fio_file *f = io_u->file;
- const uint32_t min_bs = td->o.min_bs[DDIR_WRITE];
+ const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
if (!zbd_open_zone(td, f, zbd_zone_nr(f, z))) {
zone_unlock(z);
}
if (z->verify_block * min_bs >= z->capacity) {
- log_err("%s: %d * %d >= %llu\n", f->file_name, z->verify_block,
- min_bs, (unsigned long long)z->capacity);
+ log_err("%s: %d * %"PRIu64" >= %"PRIu64"\n", f->file_name, z->verify_block,
+ min_bs, z->capacity);
/*
* If the assertion below fails during a test run, adding
* "--experimental_verify=1" to the command line may help.
}
io_u->offset = z->start + z->verify_block * min_bs;
if (io_u->offset + io_u->buflen >= zbd_zone_capacity_end(z)) {
- log_err("%s: %llu + %llu >= %llu\n", f->file_name, io_u->offset,
- io_u->buflen, (unsigned long long) zbd_zone_capacity_end(z));
+ log_err("%s: %llu + %llu >= %"PRIu64"\n", f->file_name, io_u->offset,
+ io_u->buflen, zbd_zone_capacity_end(z));
assert(false);
}
z->verify_block += io_u->buflen / min_bs;
}
/*
- * Find another zone for which @io_u fits in the readable data in the zone.
- * Search in zones @zb + 1 .. @zl. For random workload, also search in zones
- * @zb - 1 .. @zf.
+ * Find another zone which has @min_bytes of readable data. Search in zones
+ * @zb + 1 .. @zl. For random workload, also search in zones @zb - 1 .. @zf.
*
* Either returns NULL or returns a zone pointer. When the zone has write
* pointer, hold the mutex for the zone.
*/
static struct fio_zone_info *
-zbd_find_zone(struct thread_data *td, struct io_u *io_u,
+zbd_find_zone(struct thread_data *td, struct io_u *io_u, uint64_t min_bytes,
struct fio_zone_info *zb, struct fio_zone_info *zl)
{
- const uint32_t min_bs = td->o.min_bs[io_u->ddir];
struct fio_file *f = io_u->file;
struct fio_zone_info *z1, *z2;
const struct fio_zone_info *const zf = get_zone(f, f->min_zone);
if (z1 < zl && z1->cond != ZBD_ZONE_COND_OFFLINE) {
if (z1->has_wp)
zone_lock(td, f, z1);
- if (z1->start + min_bs <= z1->wp)
+ if (z1->start + min_bytes <= z1->wp)
return z1;
if (z1->has_wp)
zone_unlock(z1);
z2->cond != ZBD_ZONE_COND_OFFLINE) {
if (z2->has_wp)
zone_lock(td, f, z2);
- if (z2->start + min_bs <= z2->wp)
+ if (z2->start + min_bytes <= z2->wp)
return z2;
if (z2->has_wp)
zone_unlock(z2);
}
}
- dprint(FD_ZBD, "%s: adjusting random read offset failed\n",
- f->file_name);
+ dprint(FD_ZBD, "%s: no zone has %"PRIu64" bytes of readable data\n",
+ f->file_name, min_bytes);
return NULL;
}
pthread_mutex_unlock(&zbd_info->mutex);
z->wp = zone_end;
break;
- case DDIR_TRIM:
- assert(z->wp == z->start);
- break;
default:
break;
}
f->last_pos[ddir] >= zbd_zone_capacity_end(z)) {
dprint(FD_ZBD,
"%s: Jump from zone capacity limit to zone end:"
- " (%llu -> %llu) for zone %u (%llu)\n",
- f->file_name, (unsigned long long) f->last_pos[ddir],
- (unsigned long long) zbd_zone_end(z), zone_idx,
- (unsigned long long) z->capacity);
+ " (%"PRIu64" -> %"PRIu64") for zone %u (%"PRIu64")\n",
+ f->file_name, f->last_pos[ddir],
+ zbd_zone_end(z), zone_idx, z->capacity);
td->io_skip_bytes += zbd_zone_end(z) - f->last_pos[ddir];
f->last_pos[ddir] = zbd_zone_end(z);
}
uint32_t zone_idx_b;
struct fio_zone_info *zb, *zl, *orig_zb;
uint32_t orig_len = io_u->buflen;
- uint32_t min_bs = td->o.min_bs[io_u->ddir];
+ uint64_t min_bs = td->o.min_bs[io_u->ddir];
uint64_t new_len;
int64_t range;
if (io_u->offset + min_bs > (zb + 1)->start) {
dprint(FD_IO,
- "%s: off=%llu + min_bs=%u > next zone %llu\n",
+ "%s: off=%llu + min_bs=%"PRIu64" > next zone %"PRIu64"\n",
f->file_name, io_u->offset,
- min_bs, (unsigned long long) (zb + 1)->start);
+ min_bs, (zb + 1)->start);
io_u->offset = zb->start + (zb + 1)->start - io_u->offset;
new_len = min(io_u->buflen, (zb + 1)->start - io_u->offset);
} else {
((!td_random(td)) && (io_u->offset + min_bs > zb->wp))) {
zone_unlock(zb);
zl = get_zone(f, f->max_zone);
- zb = zbd_find_zone(td, io_u, zb, zl);
+ zb = zbd_find_zone(td, io_u, min_bs, zb, zl);
if (!zb) {
dprint(FD_ZBD,
"%s: zbd_find_zone(%lld, %llu) failed\n",
if (io_u->buflen > zbdi->zone_size) {
td_verror(td, EINVAL, "I/O buflen exceeds zone size");
dprint(FD_IO,
- "%s: I/O buflen %llu exceeds zone size %llu\n",
- f->file_name, io_u->buflen,
- (unsigned long long) zbdi->zone_size);
+ "%s: I/O buflen %llu exceeds zone size %"PRIu64"\n",
+ f->file_name, io_u->buflen, zbdi->zone_size);
goto eof;
}
if (!zbd_open_zone(td, f, zone_idx_b)) {
f->file_name);
goto eof;
}
- zone_idx_b = zbd_zone_nr(f, zb);
}
/* Check whether the zone reset threshold has been exceeded */
if (td->o.zrf.u.f) {
if (zb->capacity < min_bs) {
td_verror(td, EINVAL, "ZCAP is less min_bs");
- log_err("zone capacity %llu smaller than minimum block size %d\n",
- (unsigned long long)zb->capacity,
- min_bs);
+ log_err("zone capacity %"PRIu64" smaller than minimum block size %"PRIu64"\n",
+ zb->capacity, min_bs);
goto eof;
}
}
goto accept;
}
td_verror(td, EIO, "zone remainder too small");
- log_err("zone remainder %lld smaller than min block size %d\n",
+ log_err("zone remainder %lld smaller than min block size %"PRIu64"\n",
(zbd_zone_capacity_end(zb) - io_u->offset), min_bs);
goto eof;
case DDIR_TRIM:
- /* fall-through */
+ /* Check random trim targets a non-empty zone */
+ if (!td_random(td) || zb->wp > zb->start)
+ goto accept;
+
+ /* Find out a non-empty zone to trim */
+ zone_unlock(zb);
+ zl = get_zone(f, f->max_zone);
+ zb = zbd_find_zone(td, io_u, 1, zb, zl);
+ if (zb) {
+ io_u->offset = zb->start;
+ dprint(FD_ZBD, "%s: found new zone(%lld) for trim\n",
+ f->file_name, io_u->offset);
+ goto accept;
+ }
+ goto eof;
case DDIR_SYNC:
+ /* fall-through */
case DDIR_DATASYNC:
case DDIR_SYNC_FILE_RANGE:
case DDIR_WAIT:
{
char *res;
- if (asprintf(&res, "; %llu zone resets", (unsigned long long) ts->nr_zone_resets) < 0)
+ if (asprintf(&res, "; %"PRIu64" zone resets", ts->nr_zone_resets) < 0)
return NULL;
return res;
}
+
+/**
+ * zbd_do_io_u_trim - If reset zone is applicable, do reset zone instead of trim
+ *
+ * @td: FIO thread data.
+ * @io_u: FIO I/O unit.
+ *
+ * It is assumed that z->mutex is already locked.
+ * Return io_u_completed when reset zone succeeds. Return 0 when the target zone
+ * does not have write pointer. On error, return negative errno.
+ */
+int zbd_do_io_u_trim(const struct thread_data *td, struct io_u *io_u)
+{
+ struct fio_file *f = io_u->file;
+ struct fio_zone_info *z;
+ uint32_t zone_idx;
+ int ret;
+
+ zone_idx = zbd_zone_idx(f, io_u->offset);
+ z = get_zone(f, zone_idx);
+
+ if (!z->has_wp)
+ return 0;
+
+ if (io_u->offset != z->start) {
+ log_err("Trim offset not at zone start (%lld)\n", io_u->offset);
+ return -EINVAL;
+ }
+
+ ret = zbd_reset_zone((struct thread_data *)td, f, z);
+ if (ret < 0)
+ return ret;
+
+ return io_u_completed;
+}