fio: fix interaction between offset/size limited threads and "max_open_zones"

[fio.git] / zbd.c
diff --git a/zbd.c b/zbd.c

index 18a55ea46ef9f973c2602e135d2f81534a24de39..0dd5a6191f8244f50e58470a0493ee1c5aae79a9 100644 (file)
--- a/zbd.c
+++ b/zbd.c
@@ -927,6 +927,31 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
         f->zbd_info->zone_info[zone_idx].open = 0;
  }
  
+static void zone_lock(struct thread_data *td, struct fio_zone_info *z)
+{
+       /*
+        * Lock the io_u target zone. The zone will be unlocked if io_u offset
+        * is changed or when io_u completes and zbd_put_io() executed.
+        * To avoid multiple jobs doing asynchronous I/Os from deadlocking each
+        * other waiting for zone locks when building an io_u batch, first
+        * only trylock the zone. If the zone is already locked by another job,
+        * process the currently queued I/Os so that I/O progress is made and
+        * zones unlocked.
+        */
+       if (pthread_mutex_trylock(&z->mutex) != 0) {
+               if (!td_ioengine_flagged(td, FIO_SYNCIO))
+                       io_u_quiesce(td);
+               pthread_mutex_lock(&z->mutex);
+       }
+}
+
+/* Anything goes as long as it is not a constant. */
+static uint32_t pick_random_zone_idx(const struct fio_file *f,
+                                    const struct io_u *io_u)
+{
+       return io_u->offset * f->zbd_info->num_open_zones / f->real_file_size;
+}
+
  /*
   * Modify the offset of an I/O unit that does not refer to an open zone such
   * that it refers to an open zone. Close an open zone and open a new zone if
@@ -951,9 +976,7 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
                  * This statement accesses f->zbd_info->open_zones[] on purpose
                  * without locking.
                  */
-               zone_idx = f->zbd_info->open_zones[(io_u->offset -
-                                                   f->file_offset) *
-                               f->zbd_info->num_open_zones / f->io_size];
+               zone_idx = f->zbd_info->open_zones[pick_random_zone_idx(f, io_u)];
         } else {
                 zone_idx = zbd_zone_idx(f, io_u->offset);
         }
@@ -967,9 +990,11 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
          * has been obtained. Hence the loop.
          */
         for (;;) {
+               uint32_t tmp_idx;
+
                 z = &f->zbd_info->zone_info[zone_idx];
  
-               pthread_mutex_lock(&z->mutex);
+               zone_lock(td, z);
                 pthread_mutex_lock(&f->zbd_info->mutex);
                 if (td->o.max_open_zones == 0)
                         goto examine_zone;
@@ -980,9 +1005,35 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
                                __func__, f->file_name);
                         return NULL;
                 }
-               open_zone_idx = (io_u->offset - f->file_offset) *
-                       f->zbd_info->num_open_zones / f->io_size;
+
+               /*
+                * List of opened zones is per-device, shared across all threads.
+                * Start with quasi-random candidate zone.
+                * Ignore zones which don't belong to thread's offset/size area.
+                */
+               open_zone_idx = pick_random_zone_idx(f, io_u);
                 assert(open_zone_idx < f->zbd_info->num_open_zones);
+               tmp_idx = open_zone_idx;
+               for (i = 0; i < f->zbd_info->num_open_zones; i++) {
+                       uint32_t tmpz;
+
+                       if (tmp_idx >= f->zbd_info->num_open_zones)
+                               tmp_idx = 0;
+                       tmpz = f->zbd_info->open_zones[tmp_idx];
+
+                       if (is_valid_offset(f, f->zbd_info->zone_info[tmpz].start)) {
+                               open_zone_idx = tmp_idx;
+                               goto found_candidate_zone;
+                       }
+
+                       tmp_idx++;
+               }
+
+               dprint(FD_ZBD, "%s(%s): no candidate zone\n",
+                       __func__, f->file_name);
+               return NULL;
+
+found_candidate_zone:
                 new_zone_idx = f->zbd_info->open_zones[open_zone_idx];
                 if (new_zone_idx == zone_idx)
                         break;
@@ -1017,7 +1068,7 @@ examine_zone:
                         z = &f->zbd_info->zone_info[zone_idx];
                 }
                 assert(is_valid_offset(f, z->start));
-               pthread_mutex_lock(&z->mutex);
+               zone_lock(td, z);
                 if (z->open)
                         continue;
                 if (zbd_open_zone(td, io_u, zone_idx))
@@ -1035,7 +1086,7 @@ examine_zone:
  
                 z = &f->zbd_info->zone_info[zone_idx];
  
-               pthread_mutex_lock(&z->mutex);
+               zone_lock(td, z);
                 if (z->wp + min_bs <= (z+1)->start)
                         goto out;
                 pthread_mutex_lock(&f->zbd_info->mutex);
@@ -1321,20 +1372,7 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
  
         zbd_check_swd(f);
  
-       /*
-        * Lock the io_u target zone. The zone will be unlocked if io_u offset
-        * is changed or when io_u completes and zbd_put_io() executed.
-        * To avoid multiple jobs doing asynchronous I/Os from deadlocking each
-        * other waiting for zone locks when building an io_u batch, first
-        * only trylock the zone. If the zone is already locked by another job,
-        * process the currently queued I/Os so that I/O progress is made and
-        * zones unlocked.
-        */
-       if (pthread_mutex_trylock(&zb->mutex) != 0) {
-               if (!td_ioengine_flagged(td, FIO_SYNCIO))
-                       io_u_quiesce(td);
-               pthread_mutex_lock(&zb->mutex);
-       }
+       zone_lock(td, zb);
  
         switch (io_u->ddir) {
         case DDIR_READ: