X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=zbd.c;h=2383c57dfc90aa8258e4ebcb5a2c6264de40457b;hp=9c3092ab2424dae64c6f19646ff8ae85cbda9515;hb=379e5f09a16f789f3d15ff5541a42e1e74d2b383;hpb=5fa4aff59e4eec471b2c534702f0162006845c4b diff --git a/zbd.c b/zbd.c index 9c3092ab..2383c57d 100644 --- a/zbd.c +++ b/zbd.c @@ -186,11 +186,14 @@ static bool zbd_verify_bs(void) * size of @buf. * * Returns 0 upon success and a negative error code upon failure. + * If the zone report is empty, always assume an error (device problem) and + * return -EIO. */ static int read_zone_info(int fd, uint64_t start_sector, void *buf, unsigned int bufsz) { struct blk_zone_report *hdr = buf; + int ret; if (bufsz < sizeof(*hdr)) return -EINVAL; @@ -199,7 +202,12 @@ static int read_zone_info(int fd, uint64_t start_sector, hdr->nr_zones = (bufsz - sizeof(*hdr)) / sizeof(struct blk_zone); hdr->sector = start_sector; - return ioctl(fd, BLKREPORTZONE, hdr) >= 0 ? 0 : -errno; + ret = ioctl(fd, BLKREPORTZONE, hdr); + if (ret) + return -errno; + if (!hdr->nr_zones) + return -EIO; + return 0; } /* @@ -228,12 +236,45 @@ static enum blk_zoned_model get_zbd_model(const char *file_name) char *zoned_attr_path = NULL; char *model_str = NULL; struct stat statbuf; + char *sys_devno_path = NULL; + char *part_attr_path = NULL; + char *part_str = NULL; + char sys_path[PATH_MAX]; + ssize_t sz; + char *delim = NULL; if (stat(file_name, &statbuf) < 0) goto out; - if (asprintf(&zoned_attr_path, "/sys/dev/block/%d:%d/queue/zoned", + + if (asprintf(&sys_devno_path, "/sys/dev/block/%d:%d", major(statbuf.st_rdev), minor(statbuf.st_rdev)) < 0) goto out; + + sz = readlink(sys_devno_path, sys_path, sizeof(sys_path) - 1); + if (sz < 0) + goto out; + sys_path[sz] = '\0'; + + /* + * If the device is a partition device, cut the device name in the + * canonical sysfs path to obtain the sysfs path of the holder device. + * e.g.: /sys/devices/.../sda/sda1 -> /sys/devices/.../sda + */ + if (asprintf(&part_attr_path, "/sys/dev/block/%s/partition", + sys_path) < 0) + goto out; + part_str = read_file(part_attr_path); + if (part_str && *part_str == '1') { + delim = strrchr(sys_path, '/'); + if (!delim) + goto out; + *delim = '\0'; + } + + if (asprintf(&zoned_attr_path, + "/sys/dev/block/%s/queue/zoned", sys_path) < 0) + goto out; + model_str = read_file(zoned_attr_path); if (!model_str) goto out; @@ -246,6 +287,9 @@ static enum blk_zoned_model get_zbd_model(const char *file_name) out: free(model_str); free(zoned_attr_path); + free(part_str); + free(part_attr_path); + free(sys_devno_path); return model; } @@ -382,8 +426,6 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f) p->start = z->start << 9; switch (z->cond) { case BLK_ZONE_COND_NOT_WP: - p->wp = p->start; - break; case BLK_ZONE_COND_FULL: p->wp = p->start + zone_size; break; @@ -439,7 +481,7 @@ out: * * Returns 0 upon success and a negative error code upon failure. */ -int zbd_create_zone_info(struct thread_data *td, struct fio_file *f) +static int zbd_create_zone_info(struct thread_data *td, struct fio_file *f) { enum blk_zoned_model zbd_model; int ret = 0; @@ -606,7 +648,7 @@ static int zbd_reset_range(struct thread_data *td, const struct fio_file *f, static unsigned int zbd_zone_nr(struct zoned_block_device_info *zbd_info, struct fio_zone_info *zone) { - return (uintptr_t) zone - (uintptr_t) zbd_info->zone_info; + return zone - zbd_info->zone_info; } /** @@ -620,12 +662,10 @@ static unsigned int zbd_zone_nr(struct zoned_block_device_info *zbd_info, static int zbd_reset_zone(struct thread_data *td, const struct fio_file *f, struct fio_zone_info *z) { - int ret; - dprint(FD_ZBD, "%s: resetting wp of zone %u.\n", f->file_name, zbd_zone_nr(f->zbd_info, z)); - ret = zbd_reset_range(td, f, z->start, (z+1)->start - z->start); - return ret; + + return zbd_reset_range(td, f, z->start, (z+1)->start - z->start); } /* @@ -728,34 +768,68 @@ static bool zbd_dec_and_reset_write_cnt(const struct thread_data *td, return write_cnt == 0; } -/* Check whether the value of zbd_info.sectors_with_data is correct. */ -static void check_swd(const struct thread_data *td, const struct fio_file *f) +enum swd_action { + CHECK_SWD, + SET_SWD, +}; + +/* Calculate the number of sectors with data (swd) and perform action 'a' */ +static uint64_t zbd_process_swd(const struct fio_file *f, enum swd_action a) { -#if 0 struct fio_zone_info *zb, *ze, *z; - uint64_t swd; + uint64_t swd = 0; zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)]; ze = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset + f->io_size)]; - swd = 0; for (z = zb; z < ze; z++) { pthread_mutex_lock(&z->mutex); swd += z->wp - z->start; } pthread_mutex_lock(&f->zbd_info->mutex); - assert(f->zbd_info->sectors_with_data == swd); + switch (a) { + case CHECK_SWD: + assert(f->zbd_info->sectors_with_data == swd); + break; + case SET_SWD: + f->zbd_info->sectors_with_data = swd; + break; + } pthread_mutex_unlock(&f->zbd_info->mutex); for (z = zb; z < ze; z++) pthread_mutex_unlock(&z->mutex); -#endif + + return swd; +} + +/* + * The swd check is useful for debugging but takes too much time to leave + * it enabled all the time. Hence it is disabled by default. + */ +static const bool enable_check_swd = false; + +/* Check whether the value of zbd_info.sectors_with_data is correct. */ +static void zbd_check_swd(const struct fio_file *f) +{ + if (!enable_check_swd) + return; + + zbd_process_swd(f, CHECK_SWD); +} + +static void zbd_init_swd(struct fio_file *f) +{ + uint64_t swd; + + swd = zbd_process_swd(f, SET_SWD); + dprint(FD_ZBD, "%s(%s): swd = %" PRIu64 "\n", __func__, f->file_name, + swd); } void zbd_file_reset(struct thread_data *td, struct fio_file *f) { - struct fio_zone_info *zb, *ze, *z; + struct fio_zone_info *zb, *ze; uint32_t zone_idx_e; - uint64_t swd = 0; if (!f->zbd_info) return; @@ -763,17 +837,7 @@ void zbd_file_reset(struct thread_data *td, struct fio_file *f) zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)]; zone_idx_e = zbd_zone_idx(f, f->file_offset + f->io_size); ze = &f->zbd_info->zone_info[zone_idx_e]; - for (z = zb ; z < ze; z++) { - pthread_mutex_lock(&z->mutex); - swd += z->wp - z->start; - } - pthread_mutex_lock(&f->zbd_info->mutex); - f->zbd_info->sectors_with_data = swd; - pthread_mutex_unlock(&f->zbd_info->mutex); - for (z = zb ; z < ze; z++) - pthread_mutex_unlock(&z->mutex); - dprint(FD_ZBD, "%s(%s): swd = %llu\n", __func__, f->file_name, - (unsigned long long) swd); + zbd_init_swd(f); /* * If data verification is enabled reset the affected zones before * writing any data to avoid that a zone reset has to be issued while @@ -869,8 +933,8 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f, * a multiple of the fio block size. The caller must neither hold z->mutex * nor f->zbd_info->mutex. Returns with z->mutex held upon success. */ -struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, - struct io_u *io_u) +static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, + struct io_u *io_u) { const uint32_t min_bs = td->o.min_bs[io_u->ddir]; const struct fio_file *f = io_u->file; @@ -1053,37 +1117,44 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u, return NULL; } - /** - * zbd_post_submit - update the write pointer and unlock the zone lock + * zbd_queue_io - update the write pointer of a sequential zone * @io_u: I/O unit - * @success: Whether or not the I/O unit has been executed successfully + * @success: Whether or not the I/O unit has been queued successfully + * @q: queueing status (busy, completed or queued). * - * For write and trim operations, update the write pointer of all affected - * zones. + * For write and trim operations, update the write pointer of the I/O unit + * target zone. */ -static void zbd_post_submit(const struct io_u *io_u, bool success) +static void zbd_queue_io(struct io_u *io_u, int q, bool success) { - struct zoned_block_device_info *zbd_info; + const struct fio_file *f = io_u->file; + struct zoned_block_device_info *zbd_info = f->zbd_info; struct fio_zone_info *z; uint32_t zone_idx; - uint64_t end, zone_end; + uint64_t zone_end; - zbd_info = io_u->file->zbd_info; if (!zbd_info) return; - zone_idx = zbd_zone_idx(io_u->file, io_u->offset); - end = io_u->offset + io_u->buflen; - z = &zbd_info->zone_info[zone_idx]; + zone_idx = zbd_zone_idx(f, io_u->offset); assert(zone_idx < zbd_info->nr_zones); + z = &zbd_info->zone_info[zone_idx]; + if (z->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return; + if (!success) goto unlock; + + dprint(FD_ZBD, + "%s: queued I/O (%lld, %llu) for zone %u\n", + f->file_name, io_u->offset, io_u->buflen, zone_idx); + switch (io_u->ddir) { case DDIR_WRITE: - zone_end = min(end, (z + 1)->start); + zone_end = min((uint64_t)(io_u->offset + io_u->buflen), + (z + 1)->start); pthread_mutex_lock(&zbd_info->mutex); /* * z->wp > zone_end means that one or more I/O errors @@ -1100,8 +1171,42 @@ static void zbd_post_submit(const struct io_u *io_u, bool success) default: break; } + unlock: - pthread_mutex_unlock(&z->mutex); + if (!success || q != FIO_Q_QUEUED) { + /* BUSY or COMPLETED: unlock the zone */ + pthread_mutex_unlock(&z->mutex); + io_u->zbd_put_io = NULL; + } +} + +/** + * zbd_put_io - Unlock an I/O unit target zone lock + * @io_u: I/O unit + */ +static void zbd_put_io(const struct io_u *io_u) +{ + const struct fio_file *f = io_u->file; + struct zoned_block_device_info *zbd_info = f->zbd_info; + struct fio_zone_info *z; + uint32_t zone_idx; + + if (!zbd_info) + return; + + zone_idx = zbd_zone_idx(f, io_u->offset); + assert(zone_idx < zbd_info->nr_zones); + z = &zbd_info->zone_info[zone_idx]; + + if (z->type != BLK_ZONE_TYPE_SEQWRITE_REQ) + return; + + dprint(FD_ZBD, + "%s: terminate I/O (%lld, %llu) for zone %u\n", + f->file_name, io_u->offset, io_u->buflen, zone_idx); + + assert(pthread_mutex_unlock(&z->mutex) == 0); + zbd_check_swd(f); } bool zbd_unaligned_write(int error_code) @@ -1154,7 +1259,23 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) io_u->ddir == DDIR_READ && td->o.read_beyond_wp) return io_u_accept; - pthread_mutex_lock(&zb->mutex); + zbd_check_swd(f); + + /* + * Lock the io_u target zone. The zone will be unlocked if io_u offset + * is changed or when io_u completes and zbd_put_io() executed. + * To avoid multiple jobs doing asynchronous I/Os from deadlocking each + * other waiting for zone locks when building an io_u batch, first + * only trylock the zone. If the zone is already locked by another job, + * process the currently queued I/Os so that I/O progress is made and + * zones unlocked. + */ + if (pthread_mutex_trylock(&zb->mutex) != 0) { + if (!td_ioengine_flagged(td, FIO_SYNCIO)) + io_u_quiesce(td); + pthread_mutex_lock(&zb->mutex); + } + switch (io_u->ddir) { case DDIR_READ: if (td->runstate == TD_VERIFYING) { @@ -1227,7 +1348,6 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) } /* Check whether the zone reset threshold has been exceeded */ if (td->o.zrf.u.f) { - check_swd(td, f); if (f->zbd_info->sectors_with_data >= f->io_size * td->o.zrt.u.f && zbd_dec_and_reset_write_cnt(td, f)) { @@ -1248,7 +1368,6 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) zb->reset_zone = 0; if (zbd_reset_zone(td, f, zb) < 0) goto eof; - check_swd(td, f); } /* Make writes occur at the write pointer */ assert(!zbd_zone_full(f, zb, min_bs)); @@ -1294,8 +1413,10 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) accept: assert(zb); assert(zb->cond != BLK_ZONE_COND_OFFLINE); - assert(!io_u->post_submit); - io_u->post_submit = zbd_post_submit; + assert(!io_u->zbd_queue_io); + assert(!io_u->zbd_put_io); + io_u->zbd_queue_io = zbd_queue_io; + io_u->zbd_put_io = zbd_put_io; return io_u_accept; eof: