X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=zbd.c;h=2da742b7212280cb5e2ab562836bd058bb156fbe;hp=aa08b81115342342d383e3705aa2c25a17b9f277;hb=6f0c608564c3b5cc33330a22d1b347376e2cf1cc;hpb=480885500d2d6b29a4d0c822ceaf37d5a6eb3abd diff --git a/zbd.c b/zbd.c index aa08b811..2da742b7 100644 --- a/zbd.c +++ b/zbd.c @@ -228,12 +228,45 @@ static enum blk_zoned_model get_zbd_model(const char *file_name) char *zoned_attr_path = NULL; char *model_str = NULL; struct stat statbuf; + char *sys_devno_path = NULL; + char *part_attr_path = NULL; + char *part_str = NULL; + char sys_path[PATH_MAX]; + ssize_t sz; + char *delim = NULL; if (stat(file_name, &statbuf) < 0) goto out; - if (asprintf(&zoned_attr_path, "/sys/dev/block/%d:%d/queue/zoned", + + if (asprintf(&sys_devno_path, "/sys/dev/block/%d:%d", major(statbuf.st_rdev), minor(statbuf.st_rdev)) < 0) goto out; + + sz = readlink(sys_devno_path, sys_path, sizeof(sys_path) - 1); + if (sz < 0) + goto out; + sys_path[sz] = '\0'; + + /* + * If the device is a partition device, cut the device name in the + * canonical sysfs path to obtain the sysfs path of the holder device. + * e.g.: /sys/devices/.../sda/sda1 -> /sys/devices/.../sda + */ + if (asprintf(&part_attr_path, "/sys/dev/block/%s/partition", + sys_path) < 0) + goto out; + part_str = read_file(part_attr_path); + if (part_str && *part_str == '1') { + delim = strrchr(sys_path, '/'); + if (!delim) + goto out; + *delim = '\0'; + } + + if (asprintf(&zoned_attr_path, + "/sys/dev/block/%s/queue/zoned", sys_path) < 0) + goto out; + model_str = read_file(zoned_attr_path); if (!model_str) goto out; @@ -246,6 +279,9 @@ static enum blk_zoned_model get_zbd_model(const char *file_name) out: free(model_str); free(zoned_attr_path); + free(part_str); + free(part_attr_path); + free(sys_devno_path); return model; } @@ -726,29 +762,76 @@ static bool zbd_dec_and_reset_write_cnt(const struct thread_data *td, return write_cnt == 0; } -void zbd_file_reset(struct thread_data *td, struct fio_file *f) +enum swd_action { + CHECK_SWD, + SET_SWD, +}; + +/* Calculate the number of sectors with data (swd) and perform action 'a' */ +static uint64_t zbd_process_swd(const struct fio_file *f, enum swd_action a) { struct fio_zone_info *zb, *ze, *z; - uint32_t zone_idx_e; uint64_t swd = 0; - if (!f->zbd_info) - return; - zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)]; - zone_idx_e = zbd_zone_idx(f, f->file_offset + f->io_size); - ze = &f->zbd_info->zone_info[zone_idx_e]; - for (z = zb ; z < ze; z++) { + ze = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset + + f->io_size)]; + for (z = zb; z < ze; z++) { pthread_mutex_lock(&z->mutex); swd += z->wp - z->start; } pthread_mutex_lock(&f->zbd_info->mutex); - f->zbd_info->sectors_with_data = swd; + switch (a) { + case CHECK_SWD: + assert(f->zbd_info->sectors_with_data == swd); + break; + case SET_SWD: + f->zbd_info->sectors_with_data = swd; + break; + } pthread_mutex_unlock(&f->zbd_info->mutex); - for (z = zb ; z < ze; z++) + for (z = zb; z < ze; z++) pthread_mutex_unlock(&z->mutex); - dprint(FD_ZBD, "%s(%s): swd = %llu\n", __func__, f->file_name, - (unsigned long long) swd); + + return swd; +} + +/* + * The swd check is useful for debugging but takes too much time to leave + * it enabled all the time. Hence it is disabled by default. + */ +static const bool enable_check_swd = false; + +/* Check whether the value of zbd_info.sectors_with_data is correct. */ +static void zbd_check_swd(const struct fio_file *f) +{ + if (!enable_check_swd) + return; + + zbd_process_swd(f, CHECK_SWD); +} + +static void zbd_init_swd(struct fio_file *f) +{ + uint64_t swd; + + swd = zbd_process_swd(f, SET_SWD); + dprint(FD_ZBD, "%s(%s): swd = %" PRIu64 "\n", __func__, f->file_name, + swd); +} + +void zbd_file_reset(struct thread_data *td, struct fio_file *f) +{ + struct fio_zone_info *zb, *ze; + uint32_t zone_idx_e; + + if (!f->zbd_info) + return; + + zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)]; + zone_idx_e = zbd_zone_idx(f, f->file_offset + f->io_size); + ze = &f->zbd_info->zone_info[zone_idx_e]; + zbd_init_swd(f); /* * If data verification is enabled reset the affected zones before * writing any data to avoid that a zone reset has to be issued while @@ -1028,37 +1111,44 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u, return NULL; } - /** - * zbd_post_submit - update the write pointer and unlock the zone lock + * zbd_queue_io - update the write pointer of a sequential zone * @io_u: I/O unit - * @success: Whether or not the I/O unit has been executed successfully + * @success: Whether or not the I/O unit has been queued successfully + * @q: queueing status (busy, completed or queued). * - * For write and trim operations, update the write pointer of all affected - * zones. + * For write and trim operations, update the write pointer of the I/O unit + * target zone. */ -static void zbd_post_submit(const struct io_u *io_u, bool success) +static void zbd_queue_io(struct io_u *io_u, int q, bool success) { - struct zoned_block_device_info *zbd_info; + const struct fio_file *f = io_u->file; + struct zoned_block_device_info *zbd_info = f->zbd_info; struct fio_zone_info *z; uint32_t zone_idx; - uint64_t end, zone_end; + uint64_t zone_end; - zbd_info = io_u->file->zbd_info; if (!zbd_info) return; - zone_idx = zbd_zone_idx(io_u->file, io_u->offset); - end = io_u->offset + io_u->buflen; - z = &zbd_info->zone_info[zone_idx]; + zone_idx = zbd_zone_idx(f, io_u->offset); assert(zone_idx < zbd_info->nr_zones); + z = &zbd_info->zone_info[zone_idx]; + if (z->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return; + if (!success) goto unlock; + + dprint(FD_ZBD, + "%s: queued I/O (%lld, %llu) for zone %u\n", + f->file_name, io_u->offset, io_u->buflen, zone_idx); + switch (io_u->ddir) { case DDIR_WRITE: - zone_end = min(end, (z + 1)->start); + zone_end = min((uint64_t)(io_u->offset + io_u->buflen), + (z + 1)->start); pthread_mutex_lock(&zbd_info->mutex); /* * z->wp > zone_end means that one or more I/O errors @@ -1075,8 +1165,42 @@ static void zbd_post_submit(const struct io_u *io_u, bool success) default: break; } + unlock: - pthread_mutex_unlock(&z->mutex); + if (!success || q != FIO_Q_QUEUED) { + /* BUSY or COMPLETED: unlock the zone */ + pthread_mutex_unlock(&z->mutex); + io_u->zbd_put_io = NULL; + } +} + +/** + * zbd_put_io - Unlock an I/O unit target zone lock + * @io_u: I/O unit + */ +static void zbd_put_io(const struct io_u *io_u) +{ + const struct fio_file *f = io_u->file; + struct zoned_block_device_info *zbd_info = f->zbd_info; + struct fio_zone_info *z; + uint32_t zone_idx; + + if (!zbd_info) + return; + + zone_idx = zbd_zone_idx(f, io_u->offset); + assert(zone_idx < zbd_info->nr_zones); + z = &zbd_info->zone_info[zone_idx]; + + if (z->type != BLK_ZONE_TYPE_SEQWRITE_REQ) + return; + + dprint(FD_ZBD, + "%s: terminate I/O (%lld, %llu) for zone %u\n", + f->file_name, io_u->offset, io_u->buflen, zone_idx); + + assert(pthread_mutex_unlock(&z->mutex) == 0); + zbd_check_swd(f); } bool zbd_unaligned_write(int error_code) @@ -1129,7 +1253,23 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) io_u->ddir == DDIR_READ && td->o.read_beyond_wp) return io_u_accept; - pthread_mutex_lock(&zb->mutex); + zbd_check_swd(f); + + /* + * Lock the io_u target zone. The zone will be unlocked if io_u offset + * is changed or when io_u completes and zbd_put_io() executed. + * To avoid multiple jobs doing asynchronous I/Os from deadlocking each + * other waiting for zone locks when building an io_u batch, first + * only trylock the zone. If the zone is already locked by another job, + * process the currently queued I/Os so that I/O progress is made and + * zones unlocked. + */ + if (pthread_mutex_trylock(&zb->mutex) != 0) { + if (!td_ioengine_flagged(td, FIO_SYNCIO)) + io_u_quiesce(td); + pthread_mutex_lock(&zb->mutex); + } + switch (io_u->ddir) { case DDIR_READ: if (td->runstate == TD_VERIFYING) { @@ -1267,8 +1407,10 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) accept: assert(zb); assert(zb->cond != BLK_ZONE_COND_OFFLINE); - assert(!io_u->post_submit); - io_u->post_submit = zbd_post_submit; + assert(!io_u->zbd_queue_io); + assert(!io_u->zbd_put_io); + io_u->zbd_queue_io = zbd_queue_io; + io_u->zbd_put_io = zbd_put_io; return io_u_accept; eof: