X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=zbd.c;h=0dd5a6191f8244f50e58470a0493ee1c5aae79a9;hb=6463db6c1d3a2a961008e87a86d464b596886f1a;hp=18a55ea46ef9f973c2602e135d2f81534a24de39;hpb=4803b8419f03e9d97a992329b895a0dd3d025b84;p=fio.git diff --git a/zbd.c b/zbd.c index 18a55ea4..0dd5a619 100644 --- a/zbd.c +++ b/zbd.c @@ -927,6 +927,31 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f, f->zbd_info->zone_info[zone_idx].open = 0; } +static void zone_lock(struct thread_data *td, struct fio_zone_info *z) +{ + /* + * Lock the io_u target zone. The zone will be unlocked if io_u offset + * is changed or when io_u completes and zbd_put_io() executed. + * To avoid multiple jobs doing asynchronous I/Os from deadlocking each + * other waiting for zone locks when building an io_u batch, first + * only trylock the zone. If the zone is already locked by another job, + * process the currently queued I/Os so that I/O progress is made and + * zones unlocked. + */ + if (pthread_mutex_trylock(&z->mutex) != 0) { + if (!td_ioengine_flagged(td, FIO_SYNCIO)) + io_u_quiesce(td); + pthread_mutex_lock(&z->mutex); + } +} + +/* Anything goes as long as it is not a constant. */ +static uint32_t pick_random_zone_idx(const struct fio_file *f, + const struct io_u *io_u) +{ + return io_u->offset * f->zbd_info->num_open_zones / f->real_file_size; +} + /* * Modify the offset of an I/O unit that does not refer to an open zone such * that it refers to an open zone. Close an open zone and open a new zone if @@ -951,9 +976,7 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, * This statement accesses f->zbd_info->open_zones[] on purpose * without locking. */ - zone_idx = f->zbd_info->open_zones[(io_u->offset - - f->file_offset) * - f->zbd_info->num_open_zones / f->io_size]; + zone_idx = f->zbd_info->open_zones[pick_random_zone_idx(f, io_u)]; } else { zone_idx = zbd_zone_idx(f, io_u->offset); } @@ -967,9 +990,11 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, * has been obtained. Hence the loop. */ for (;;) { + uint32_t tmp_idx; + z = &f->zbd_info->zone_info[zone_idx]; - pthread_mutex_lock(&z->mutex); + zone_lock(td, z); pthread_mutex_lock(&f->zbd_info->mutex); if (td->o.max_open_zones == 0) goto examine_zone; @@ -980,9 +1005,35 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, __func__, f->file_name); return NULL; } - open_zone_idx = (io_u->offset - f->file_offset) * - f->zbd_info->num_open_zones / f->io_size; + + /* + * List of opened zones is per-device, shared across all threads. + * Start with quasi-random candidate zone. + * Ignore zones which don't belong to thread's offset/size area. + */ + open_zone_idx = pick_random_zone_idx(f, io_u); assert(open_zone_idx < f->zbd_info->num_open_zones); + tmp_idx = open_zone_idx; + for (i = 0; i < f->zbd_info->num_open_zones; i++) { + uint32_t tmpz; + + if (tmp_idx >= f->zbd_info->num_open_zones) + tmp_idx = 0; + tmpz = f->zbd_info->open_zones[tmp_idx]; + + if (is_valid_offset(f, f->zbd_info->zone_info[tmpz].start)) { + open_zone_idx = tmp_idx; + goto found_candidate_zone; + } + + tmp_idx++; + } + + dprint(FD_ZBD, "%s(%s): no candidate zone\n", + __func__, f->file_name); + return NULL; + +found_candidate_zone: new_zone_idx = f->zbd_info->open_zones[open_zone_idx]; if (new_zone_idx == zone_idx) break; @@ -1017,7 +1068,7 @@ examine_zone: z = &f->zbd_info->zone_info[zone_idx]; } assert(is_valid_offset(f, z->start)); - pthread_mutex_lock(&z->mutex); + zone_lock(td, z); if (z->open) continue; if (zbd_open_zone(td, io_u, zone_idx)) @@ -1035,7 +1086,7 @@ examine_zone: z = &f->zbd_info->zone_info[zone_idx]; - pthread_mutex_lock(&z->mutex); + zone_lock(td, z); if (z->wp + min_bs <= (z+1)->start) goto out; pthread_mutex_lock(&f->zbd_info->mutex); @@ -1321,20 +1372,7 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u) zbd_check_swd(f); - /* - * Lock the io_u target zone. The zone will be unlocked if io_u offset - * is changed or when io_u completes and zbd_put_io() executed. - * To avoid multiple jobs doing asynchronous I/Os from deadlocking each - * other waiting for zone locks when building an io_u batch, first - * only trylock the zone. If the zone is already locked by another job, - * process the currently queued I/Os so that I/O progress is made and - * zones unlocked. - */ - if (pthread_mutex_trylock(&zb->mutex) != 0) { - if (!td_ioengine_flagged(td, FIO_SYNCIO)) - io_u_quiesce(td); - pthread_mutex_lock(&zb->mutex); - } + zone_lock(td, zb); switch (io_u->ddir) { case DDIR_READ: