X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=zbd.c;h=99310c49043b8cf79ca63b51689ecd6817eab99f;hb=b10b1e70afaff8c9b00005e9238f2ad347a9c00a;hp=310b1732749702e551f475d1af4dd0f3b57c60dc;hpb=d9ed3e63e5280899e2e0bc7e55124c2a1acb30ca;p=fio.git diff --git a/zbd.c b/zbd.c index 310b1732..99310c49 100644 --- a/zbd.c +++ b/zbd.c @@ -119,6 +119,30 @@ static bool zbd_verify_sizes(void) continue; if (!zbd_is_seq_job(f)) continue; + + if (!td->o.zone_size) { + td->o.zone_size = f->zbd_info->zone_size; + if (!td->o.zone_size) { + log_err("%s: invalid 0 zone size\n", + f->file_name); + return false; + } + } else if (td->o.zone_size != f->zbd_info->zone_size) { + log_err("%s: job parameter zonesize %llu does not match disk zone size %llu.\n", + f->file_name, (unsigned long long) td->o.zone_size, + (unsigned long long) f->zbd_info->zone_size); + return false; + } + + if (td->o.zone_skip && + (td->o.zone_skip < td->o.zone_size || + td->o.zone_skip % td->o.zone_size)) { + log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n", + f->file_name, (unsigned long long) td->o.zone_skip, + (unsigned long long) td->o.zone_size); + return false; + } + zone_idx = zbd_zone_idx(f, f->file_offset); z = &f->zbd_info->zone_info[zone_idx]; if (f->file_offset != z->start) { @@ -186,11 +210,14 @@ static bool zbd_verify_bs(void) * size of @buf. * * Returns 0 upon success and a negative error code upon failure. + * If the zone report is empty, always assume an error (device problem) and + * return -EIO. */ static int read_zone_info(int fd, uint64_t start_sector, void *buf, unsigned int bufsz) { struct blk_zone_report *hdr = buf; + int ret; if (bufsz < sizeof(*hdr)) return -EINVAL; @@ -199,7 +226,12 @@ static int read_zone_info(int fd, uint64_t start_sector, hdr->nr_zones = (bufsz - sizeof(*hdr)) / sizeof(struct blk_zone); hdr->sector = start_sector; - return ioctl(fd, BLKREPORTZONE, hdr) >= 0 ? 0 : -errno; + ret = ioctl(fd, BLKREPORTZONE, hdr); + if (ret) + return -errno; + if (!hdr->nr_zones) + return -EIO; + return 0; } /* @@ -304,13 +336,23 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f) { uint32_t nr_zones; struct fio_zone_info *p; - uint64_t zone_size; + uint64_t zone_size = td->o.zone_size; struct zoned_block_device_info *zbd_info = NULL; pthread_mutexattr_t attr; int i; - zone_size = td->o.zone_size; - assert(zone_size); + if (zone_size == 0) { + log_err("%s: Specifying the zone size is mandatory for regular block devices with --zonemode=zbd\n\n", + f->file_name); + return 1; + } + + if (zone_size < 512) { + log_err("%s: zone size must be at least 512 bytes for --zonemode=zbd\n\n", + f->file_name); + return 1; + } + nr_zones = (f->real_file_size + zone_size - 1) / zone_size; zbd_info = scalloc(1, sizeof(*zbd_info) + (nr_zones + 1) * sizeof(zbd_info->zone_info[0])); @@ -393,8 +435,8 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f) if (td->o.zone_size == 0) { td->o.zone_size = zone_size; } else if (td->o.zone_size != zone_size) { - log_info("fio: %s job parameter zonesize %llu does not match disk zone size %llu.\n", - f->file_name, (unsigned long long) td->o.zone_size, + log_err("fio: %s job parameter zonesize %llu does not match disk zone size %llu.\n", + f->file_name, (unsigned long long) td->o.zone_size, (unsigned long long) zone_size); ret = -EINVAL; goto close; @@ -418,8 +460,6 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f) p->start = z->start << 9; switch (z->cond) { case BLK_ZONE_COND_NOT_WP: - p->wp = p->start; - break; case BLK_ZONE_COND_FULL: p->wp = p->start + zone_size; break; @@ -475,7 +515,7 @@ out: * * Returns 0 upon success and a negative error code upon failure. */ -int zbd_create_zone_info(struct thread_data *td, struct fio_file *f) +static int zbd_create_zone_info(struct thread_data *td, struct fio_file *f) { enum blk_zoned_model zbd_model; int ret = 0; @@ -543,7 +583,7 @@ static int zbd_init_zone_info(struct thread_data *td, struct fio_file *file) ret = zbd_create_zone_info(td, file); if (ret < 0) - td_verror(td, -ret, "BLKREPORTZONE failed"); + td_verror(td, -ret, "zbd_create_zone_info() failed"); return ret; } @@ -555,18 +595,8 @@ int zbd_init(struct thread_data *td) for_each_file(td, f, i) { if (f->filetype != FIO_TYPE_BLOCK) continue; - if (td->o.zone_size && td->o.zone_size < 512) { - log_err("%s: zone size must be at least 512 bytes for --zonemode=zbd\n\n", - f->file_name); + if (zbd_init_zone_info(td, f)) return 1; - } - if (td->o.zone_size == 0 && - get_zbd_model(f->file_name) == ZBD_DM_NONE) { - log_err("%s: Specifying the zone size is mandatory for regular block devices with --zonemode=zbd\n\n", - f->file_name); - return 1; - } - zbd_init_zone_info(td, f); } if (!zbd_using_direct_io()) { @@ -927,8 +957,8 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f, * a multiple of the fio block size. The caller must neither hold z->mutex * nor f->zbd_info->mutex. Returns with z->mutex held upon success. */ -struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, - struct io_u *io_u) +static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, + struct io_u *io_u) { const uint32_t min_bs = td->o.min_bs[io_u->ddir]; const struct fio_file *f = io_u->file; @@ -1213,6 +1243,65 @@ bool zbd_unaligned_write(int error_code) return false; } +/** + * setup_zbd_zone_mode - handle zoneskip as necessary for ZBD drives + * @td: FIO thread data. + * @io_u: FIO I/O unit. + * + * For sequential workloads, change the file offset to skip zoneskip bytes when + * no more IO can be performed in the current zone. + * - For read workloads, zoneskip is applied when the io has reached the end of + * the zone or the zone write position (when td->o.read_beyond_wp is false). + * - For write workloads, zoneskip is applied when the zone is full. + * This applies only to read and write operations. + */ +void setup_zbd_zone_mode(struct thread_data *td, struct io_u *io_u) +{ + struct fio_file *f = io_u->file; + enum fio_ddir ddir = io_u->ddir; + struct fio_zone_info *z; + uint32_t zone_idx; + + assert(td->o.zone_mode == ZONE_MODE_ZBD); + assert(td->o.zone_size); + + /* + * zone_skip is valid only for sequential workloads. + */ + if (td_random(td) || !td->o.zone_skip) + return; + + /* + * It is time to switch to a new zone if: + * - zone_bytes == zone_size bytes have already been accessed + * - The last position reached the end of the current zone. + * - For reads with td->o.read_beyond_wp == false, the last position + * reached the zone write pointer. + */ + zone_idx = zbd_zone_idx(f, f->last_pos[ddir]); + z = &f->zbd_info->zone_info[zone_idx]; + + if (td->zone_bytes >= td->o.zone_size || + f->last_pos[ddir] >= (z+1)->start || + (ddir == DDIR_READ && + (!td->o.read_beyond_wp) && f->last_pos[ddir] >= z->wp)) { + /* + * Skip zones. + */ + td->zone_bytes = 0; + f->file_offset += td->o.zone_size + td->o.zone_skip; + + /* + * Wrap from the beginning, if we exceed the file size + */ + if (f->file_offset >= f->real_file_size) + f->file_offset = get_start_offset(td, f); + + f->last_pos[ddir] = f->file_offset; + td->io_skip_bytes += td->o.zone_skip; + } +} + /** * zbd_adjust_block - adjust the offset and length as necessary for ZBD drives * @td: FIO thread data. @@ -1255,7 +1344,21 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) zbd_check_swd(f); - pthread_mutex_lock(&zb->mutex); + /* + * Lock the io_u target zone. The zone will be unlocked if io_u offset + * is changed or when io_u completes and zbd_put_io() executed. + * To avoid multiple jobs doing asynchronous I/Os from deadlocking each + * other waiting for zone locks when building an io_u batch, first + * only trylock the zone. If the zone is already locked by another job, + * process the currently queued I/Os so that I/O progress is made and + * zones unlocked. + */ + if (pthread_mutex_trylock(&zb->mutex) != 0) { + if (!td_ioengine_flagged(td, FIO_SYNCIO)) + io_u_quiesce(td); + pthread_mutex_lock(&zb->mutex); + } + switch (io_u->ddir) { case DDIR_READ: if (td->runstate == TD_VERIFYING) {