Merge branch 'master' of https://github.com/celestinechen/fio
[fio.git] / filesetup.c
index b51ab35ce08b59ccbff3d2d7f2da2692c69a85aa..2d277a6428a3b19dee365262405b30850d90a669 100644 (file)
@@ -5,8 +5,6 @@
 #include <dirent.h>
 #include <libgen.h>
 #include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/types.h>
 
 #include "fio.h"
 #include "smalloc.h"
 #include "os/os.h"
 #include "hash.h"
 #include "lib/axmap.h"
+#include "rwlock.h"
+#include "zbd.h"
 
 #ifdef CONFIG_LINUX_FALLOCATE
 #include <linux/falloc.h>
 #endif
 
-static int root_warn;
-
 static FLIST_HEAD(filename_list);
 
 /*
@@ -38,7 +36,7 @@ static inline void clear_error(struct thread_data *td)
        td->verror[0] = '\0';
 }
 
-static inline int native_fallocate(struct thread_data *td, struct fio_file *f)
+static int native_fallocate(struct thread_data *td, struct fio_file *f)
 {
        bool success;
 
@@ -49,32 +47,29 @@ static inline int native_fallocate(struct thread_data *td, struct fio_file *f)
                        !success ? "un": "");
 
        if (success)
-               return 0;
+               return false;
 
        if (errno == ENOSYS)
                dprint(FD_FILE, "native fallocate is not implemented\n");
 
-       return -1;
+       return true;
 }
 
 static void fallocate_file(struct thread_data *td, struct fio_file *f)
 {
-       int r;
-
        if (td->o.fill_device)
                return;
 
        switch (td->o.fallocate_mode) {
        case FIO_FALLOCATE_NATIVE:
-               r = native_fallocate(td, f);
-               if (r != 0 && errno != ENOSYS)
-                       log_err("fio: native_fallocate call failed: %s\n",
-                                       strerror(errno));
+               native_fallocate(td, f);
                break;
        case FIO_FALLOCATE_NONE:
                break;
 #ifdef CONFIG_POSIX_FALLOCATE
-       case FIO_FALLOCATE_POSIX:
+       case FIO_FALLOCATE_POSIX: {
+               int r;
+
                dprint(FD_FILE, "posix_fallocate file %s size %llu\n",
                                 f->file_name,
                                 (unsigned long long) f->real_file_size);
@@ -83,9 +78,12 @@ static void fallocate_file(struct thread_data *td, struct fio_file *f)
                if (r > 0)
                        log_err("fio: posix_fallocate fails: %s\n", strerror(r));
                break;
+               }
 #endif /* CONFIG_POSIX_FALLOCATE */
 #ifdef CONFIG_LINUX_FALLOCATE
-       case FIO_FALLOCATE_KEEP_SIZE:
+       case FIO_FALLOCATE_KEEP_SIZE: {
+               int r;
+
                dprint(FD_FILE, "fallocate(FALLOC_FL_KEEP_SIZE) "
                                "file %s size %llu\n", f->file_name,
                                (unsigned long long) f->real_file_size);
@@ -95,7 +93,20 @@ static void fallocate_file(struct thread_data *td, struct fio_file *f)
                        td_verror(td, errno, "fallocate");
 
                break;
+               }
 #endif /* CONFIG_LINUX_FALLOCATE */
+       case FIO_FALLOCATE_TRUNCATE: {
+               int r;
+
+               dprint(FD_FILE, "ftruncate file %s size %llu\n",
+                               f->file_name,
+                               (unsigned long long) f->real_file_size);
+               r = ftruncate(f->fd, f->real_file_size);
+               if (r != 0)
+                       td_verror(td, errno, "ftruncate");
+
+               break;
+       }
        default:
                log_err("fio: unknown fallocate mode: %d\n", td->o.fallocate_mode);
                assert(0);
@@ -109,7 +120,7 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
 {
        int new_layout = 0, unlink_file = 0, flags;
        unsigned long long left;
-       unsigned int bs;
+       unsigned long long bs;
        char *b = NULL;
 
        if (read_only) {
@@ -215,18 +226,22 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
                        if (r < 0) {
                                int __e = errno;
 
-                               if (__e == ENOSPC) {
+                               if (__e == ENOSPC || __e == EDQUOT) {
+                                       const char *__e_name;
                                        if (td->o.fill_device)
                                                break;
-                                       log_info("fio: ENOSPC on laying out "
-                                                "file, stopping\n");
-                                       break;
+                                       if (__e == ENOSPC)
+                                               __e_name = "ENOSPC";
+                                       else
+                                               __e_name = "EDQUOT";
+                                       log_info("fio: %s on laying out "
+                                                "file, stopping\n", __e_name);
                                }
                                td_verror(td, errno, "write");
                        } else
                                td_verror(td, EIO, "write");
 
-                       break;
+                       goto err;
                }
        }
 
@@ -258,24 +273,25 @@ err:
        return 1;
 }
 
-static int pre_read_file(struct thread_data *td, struct fio_file *f)
+static bool pre_read_file(struct thread_data *td, struct fio_file *f)
 {
-       int ret = 0, r, did_open = 0, old_runstate;
+       int r, did_open = 0, old_runstate;
        unsigned long long left;
-       unsigned int bs;
+       unsigned long long bs;
+       bool ret = true;
        char *b;
 
        if (td_ioengine_flagged(td, FIO_PIPEIO) ||
            td_ioengine_flagged(td, FIO_NOIO))
-               return 0;
+               return true;
 
        if (f->filetype == FIO_TYPE_CHAR)
-               return 0;
+               return true;
 
        if (!fio_file_open(f)) {
                if (td->io_ops->open_file(td, f)) {
                        log_err("fio: cannot pre-read, failed to open file\n");
-                       return 1;
+                       return false;
                }
                did_open = 1;
        }
@@ -287,18 +303,17 @@ static int pre_read_file(struct thread_data *td, struct fio_file *f)
        if (bs > left)
                bs = left;
 
-       b = malloc(bs);
+       b = calloc(1, bs);
        if (!b) {
                td_verror(td, errno, "malloc");
-               ret = 1;
+               ret = false;
                goto error;
        }
-       memset(b, 0, bs);
 
        if (lseek(f->fd, f->file_offset, SEEK_SET) < 0) {
                td_verror(td, errno, "lseek");
                log_err("fio: failed to lseek pre-read file\n");
-               ret = 1;
+               ret = false;
                goto error;
        }
 
@@ -327,11 +342,100 @@ error:
        return ret;
 }
 
+/*
+ * Generic function to prepopulate regular file with data.
+ * Useful if you want to make sure I/O engine has data to read.
+ * Leaves f->fd open on success, caller must close.
+ */
+int generic_prepopulate_file(struct thread_data *td, struct fio_file *f)
+{
+       int flags;
+       unsigned long long left, bs;
+       char *b = NULL;
+
+       /* generic function for regular files only */
+       assert(f->filetype == FIO_TYPE_FILE);
+
+       if (read_only) {
+               log_err("fio: refusing to write a file due to read-only\n");
+               return 0;
+       }
+
+       flags = O_WRONLY;
+       if (td->o.allow_create)
+               flags |= O_CREAT;
+
+#ifdef WIN32
+       flags |= _O_BINARY;
+#endif
+
+       dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
+       f->fd = open(f->file_name, flags, 0644);
+       if (f->fd < 0) {
+               int err = errno;
+
+               if (err == ENOENT && !td->o.allow_create)
+                       log_err("fio: file creation disallowed by "
+                                       "allow_file_create=0\n");
+               else
+                       td_verror(td, err, "open");
+               return 1;
+       }
+
+       left = f->real_file_size;
+       bs = td->o.max_bs[DDIR_WRITE];
+       if (bs > left)
+               bs = left;
+
+       b = malloc(bs);
+       if (!b) {
+               td_verror(td, errno, "malloc");
+               goto err;
+       }
+
+       while (left && !td->terminate) {
+               ssize_t r;
+
+               if (bs > left)
+                       bs = left;
+
+               fill_io_buffer(td, b, bs, bs);
+
+               r = write(f->fd, b, bs);
+
+               if (r > 0) {
+                       left -= r;
+               } else {
+                       td_verror(td, errno, "write");
+                       goto err;
+               }
+       }
+
+       if (td->terminate) {
+               dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
+               td_io_unlink_file(td, f);
+       } else if (td->o.create_fsync) {
+               if (fsync(f->fd) < 0) {
+                       td_verror(td, errno, "fsync");
+                       goto err;
+               }
+       }
+
+       free(b);
+       return 0;
+err:
+       close(f->fd);
+       f->fd = -1;
+       if (b)
+               free(b);
+       return 1;
+}
+
 unsigned long long get_rand_file_size(struct thread_data *td)
 {
        unsigned long long ret, sized;
        uint64_t frand_max;
-       unsigned long r;
+       uint64_t r;
 
        frand_max = rand_max(&td->file_size_state);
        r = __rand(&td->file_size_state);
@@ -433,8 +537,12 @@ static int get_file_size(struct thread_data *td, struct fio_file *f)
                ret = bdev_size(td, f);
        else if (f->filetype == FIO_TYPE_CHAR)
                ret = char_size(td, f);
-       else
-               f->real_file_size = -1ULL;
+       else {
+               f->real_file_size = -1;
+               log_info("%s: failed to get file size of %s\n", td->o.name,
+                                       f->file_name);
+               return 1; /* avoid offset extends end error message */
+       }
 
        /*
         * Leave ->real_file_size with 0 since it could be expectation
@@ -443,23 +551,11 @@ static int get_file_size(struct thread_data *td, struct fio_file *f)
        if (ret)
                return ret;
 
-       /*
-        * If ->real_file_size is -1, a conditional for the message
-        * "offset extends end" is always true, but it makes no sense,
-        * so just return the same value here.
-        */
-       if (f->real_file_size == -1ULL) {
-               log_info("%s: failed to get file size of %s\n", td->o.name,
-                                       f->file_name);
-               return 1;
-       }
-
-       if (td->o.start_offset && f->file_offset == 0)
-               dprint(FD_FILE, "offset of file %s not initialized yet\n",
-                                       f->file_name);
        /*
         * ->file_offset normally hasn't been initialized yet, so this
-        * is basically always false.
+        * is basically always false unless ->real_file_size is -1, but
+        * if ->real_file_size is -1 this message doesn't make sense.
+        * As a result, this message is basically useless.
         */
        if (f->file_offset > f->real_file_size) {
                log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
@@ -496,6 +592,9 @@ static int __file_invalidate_cache(struct thread_data *td, struct fio_file *f,
                ret = td->io_ops->invalidate(td, f);
                if (ret < 0)
                        errval = -ret;
+       } else if (td_ioengine_flagged(td, FIO_DISKLESSIO)) {
+               dprint(FD_IO, "invalidate not supported by ioengine %s\n",
+                      td->io_ops->name);
        } else if (f->filetype == FIO_TYPE_FILE) {
                dprint(FD_IO, "declare unneeded cache %s: %llu/%llu\n",
                        f->file_name, off, len);
@@ -518,19 +617,16 @@ static int __file_invalidate_cache(struct thread_data *td, struct fio_file *f,
                        ret = blockdev_invalidate_cache(f);
                }
                if (ret < 0 && errno == EACCES && geteuid()) {
-                       if (!root_warn) {
+                       if (!fio_did_warn(FIO_WARN_ROOT_FLUSH)) {
                                log_err("fio: only root may flush block "
                                        "devices. Cache flush bypassed!\n");
-                               root_warn = 1;
                        }
-                       ret = 0;
                }
                if (ret < 0)
                        errval = errno;
        } else if (f->filetype == FIO_TYPE_CHAR ||
                   f->filetype == FIO_TYPE_PIPE) {
                dprint(FD_IO, "invalidate not supported %s\n", f->file_name);
-               ret = 0;
        }
 
        /*
@@ -640,22 +736,11 @@ int generic_open_file(struct thread_data *td, struct fio_file *f)
                        f_out = stderr;
        }
 
-       if (td_trim(td))
-               goto skip_flags;
        if (td->o.odirect)
                flags |= OS_O_DIRECT;
-       if (td->o.oatomic) {
-               if (!FIO_O_ATOMIC) {
-                       td_verror(td, EINVAL, "OS does not support atomic IO");
-                       return 1;
-               }
-               flags |= OS_O_DIRECT | FIO_O_ATOMIC;
-       }
-       if (td->o.sync_io)
-               flags |= O_SYNC;
+       flags |= td->o.sync_io;
        if (td->o.create_on_open && td->o.allow_create)
                flags |= O_CREAT;
-skip_flags:
        if (f->filetype != FIO_TYPE_FILE)
                flags |= FIO_O_NOATIME;
 
@@ -664,6 +749,11 @@ open_again:
                if (!read_only)
                        flags |= O_RDWR;
 
+               if (td->o.verify_only) {
+                       flags &= ~O_RDWR;
+                       flags |= O_RDONLY;
+               }
+
                if (f->filetype == FIO_TYPE_FILE && td->o.allow_create)
                        flags |= O_CREAT;
 
@@ -672,7 +762,7 @@ open_again:
                else
                        from_hash = file_lookup_open(f, flags);
        } else if (td_read(td)) {
-               if (f->filetype == FIO_TYPE_CHAR && !read_only)
+               if (td_ioengine_flagged(td, FIO_RO_NEEDS_RW_OPEN) && !read_only)
                        flags |= O_RDWR;
                else
                        flags |= O_RDONLY;
@@ -683,7 +773,8 @@ open_again:
                        from_hash = file_lookup_open(f, flags);
        } else if (td_trim(td)) {
                assert(!td_rw(td)); /* should have matched above */
-               flags |= O_RDWR;
+               if (!read_only)
+                       flags |= O_RDWR;
                from_hash = file_lookup_open(f, flags);
        }
 
@@ -812,8 +903,7 @@ static unsigned long long get_fs_free_counts(struct thread_data *td)
                } else if (f->filetype != FIO_TYPE_FILE)
                        continue;
 
-               buf[255] = '\0';
-               strncpy(buf, f->file_name, 255);
+               snprintf(buf, FIO_ARRAY_SIZE(buf), "%s", f->file_name);
 
                if (stat(buf, &sb) < 0) {
                        if (errno != ENOENT)
@@ -836,7 +926,7 @@ static unsigned long long get_fs_free_counts(struct thread_data *td)
                        continue;
 
                fm = calloc(1, sizeof(*fm));
-               strncpy(fm->__base, buf, sizeof(fm->__base) - 1);
+               snprintf(fm->__base, FIO_ARRAY_SIZE(fm->__base), "%s", buf);
                fm->base = basename(fm->__base);
                fm->key = sb.st_dev;
                flist_add(&fm->list, &list);
@@ -860,45 +950,128 @@ static unsigned long long get_fs_free_counts(struct thread_data *td)
 
 uint64_t get_start_offset(struct thread_data *td, struct fio_file *f)
 {
+       bool align = false;
        struct thread_options *o = &td->o;
        unsigned long long align_bs;
        unsigned long long offset;
+       unsigned long long increment;
 
        if (o->file_append && f->filetype == FIO_TYPE_FILE)
                return f->real_file_size;
 
+       if (o->offset_increment_percent) {
+               assert(!o->offset_increment);
+               increment = o->offset_increment_percent * f->real_file_size / 100;
+               align = true;
+       } else
+               increment = o->offset_increment;
+
        if (o->start_offset_percent > 0) {
+               /* calculate the raw offset */
+               offset = (f->real_file_size * o->start_offset_percent / 100) +
+                       (td->subjob_number * increment);
+
+               align = true;
+       } else {
+               /* start_offset_percent not set */
+               offset = o->start_offset +
+                               td->subjob_number * increment;
+       }
+
+       if (align) {
                /*
-                * if blockalign is provided, find the min across read, write,
-                * and trim
+                * if offset_align is provided, use it
                 */
-               if (fio_option_is_set(o, ba)) {
-                       align_bs = (unsigned long long) min(o->ba[DDIR_READ], o->ba[DDIR_WRITE]);
-                       align_bs = min((unsigned long long) o->ba[DDIR_TRIM], align_bs);
+               if (fio_option_is_set(o, start_offset_align)) {
+                       align_bs = o->start_offset_align;
                } else {
                        /* else take the minimum block size */
                        align_bs = td_min_bs(td);
                }
 
-               /* calculate the raw offset */
-               offset = (f->real_file_size * o->start_offset_percent / 100) +
-                       (td->subjob_number * o->offset_increment);
-
                /*
                 * block align the offset at the next available boundary at
                 * ceiling(offset / align_bs) * align_bs
                 */
                offset = (offset / align_bs + (offset % align_bs != 0)) * align_bs;
+       }
 
-       } else {
-               /* start_offset_percent not set */
-               offset = o->start_offset +
-                               td->subjob_number * o->offset_increment;
+       return offset;
+}
+
+/*
+ * Find longest path component that exists and return its length
+ */
+int longest_existing_path(char *path) {
+       char buf[PATH_MAX];
+       bool done;
+       char *buf_pos;
+       int offset;
+#ifdef WIN32
+       DWORD dwAttr;
+#else
+       struct stat sb;
+#endif
+
+       sprintf(buf, "%s", path);
+       done = false;
+       while (!done) {
+               buf_pos = strrchr(buf, FIO_OS_PATH_SEPARATOR);
+               if (!buf_pos) {
+                       offset = 0;
+                       break;
+               }
+
+               *(buf_pos + 1) = '\0';
+
+#ifdef WIN32
+               dwAttr = GetFileAttributesA(buf);
+               if (dwAttr != INVALID_FILE_ATTRIBUTES) {
+                       done = true;
+               }
+#else
+               if (stat(buf, &sb) == 0)
+                       done = true;
+#endif
+               if (done)
+                       offset = buf_pos - buf;
+               else
+                       *buf_pos = '\0';
        }
 
        return offset;
 }
 
+static bool create_work_dirs(struct thread_data *td, const char *fname)
+{
+       char path[PATH_MAX];
+       char *start, *end;
+       int offset;
+
+       snprintf(path, PATH_MAX, "%s", fname);
+       start = path;
+
+       offset = longest_existing_path(path);
+       end = start + offset;
+       while ((end = strchr(end, FIO_OS_PATH_SEPARATOR)) != NULL) {
+               if (end == start) {
+                       end++;
+                       continue;
+               }
+               *end = '\0';
+               errno = 0;
+               if (fio_mkdir(path, 0700) && errno != EEXIST) {
+                       log_err("fio: failed to create dir (%s): %s\n",
+                               start, strerror(errno));
+                       return false;
+               }
+               *end = FIO_OS_PATH_SEPARATOR;
+               end++;
+       }
+       td->flags |= TD_F_DIRS_CREATED;
+       return true;
+}
+
 /*
  * Open the files and setup files sizes, creating files if necessary.
  */
@@ -910,15 +1083,20 @@ int setup_files(struct thread_data *td)
        unsigned int i, nr_fs_extra = 0;
        int err = 0, need_extend;
        int old_state;
-       const unsigned int bs = td_min_bs(td);
+       const unsigned long long bs = td_min_bs(td);
        uint64_t fs = 0;
 
        dprint(FD_FILE, "setup files\n");
 
        old_state = td_bump_runstate(td, TD_SETTING_UP);
 
-       if (o->read_iolog_file)
-               goto done;
+       for_each_file(td, f, i) {
+               if (!td_ioengine_flagged(td, FIO_DISKLESSIO) &&
+                   strchr(f->file_name, FIO_OS_PATH_SEPARATOR) &&
+                   !(td->flags & TD_F_DIRS_CREATED) &&
+                   !create_work_dirs(td, f->file_name))
+                       goto err_out;
+       }
 
        /*
         * Find out physical size of files or devices for this thread,
@@ -935,6 +1113,16 @@ int setup_files(struct thread_data *td)
        if (err)
                goto err_out;
 
+       if (td->o.zone_mode == ZONE_MODE_ZBD) {
+               err = zbd_init_files(td);
+               if (err)
+                       goto err_out;
+       }
+       zbd_recalc_options_with_zone_granularity(td);
+
+       if (o->read_iolog_file)
+               goto done;
+
        /*
         * check sizes. if the files/devices do not exist and the size
         * isn't passed to fio, abort.
@@ -1043,7 +1231,9 @@ int setup_files(struct thread_data *td)
                if (f->io_size == -1ULL)
                        total_size = -1ULL;
                else {
-                        if (o->size_percent) {
+                       uint64_t io_size;
+
+                        if (o->size_percent && o->size_percent != 100) {
                                uint64_t file_size;
 
                                file_size = f->io_size + f->file_offset;
@@ -1054,17 +1244,26 @@ int setup_files(struct thread_data *td)
 
                                f->io_size -= (f->io_size % td_min_bs(td));
                        }
-                       total_size += f->io_size;
+
+                       io_size = f->io_size;
+                       if (o->io_size_percent && o->io_size_percent != 100) {
+                               io_size *= o->io_size_percent;
+                               io_size /= 100;
+                       }
+
+                       total_size += io_size;
                }
 
                if (f->filetype == FIO_TYPE_FILE &&
-                   (f->io_size + f->file_offset) > f->real_file_size &&
-                   !td_ioengine_flagged(td, FIO_DISKLESSIO)) {
-                       if (!o->create_on_open) {
+                   (f->io_size + f->file_offset) > f->real_file_size) {
+                       if (!td_ioengine_flagged(td, FIO_DISKLESSIO) &&
+                           !o->create_on_open) {
                                need_extend++;
                                extend_size += (f->io_size + f->file_offset);
                                fio_file_set_extend(f);
-                       } else
+                       } else if (!td_ioengine_flagged(td, FIO_DISKLESSIO) ||
+                                  (td_ioengine_flagged(td, FIO_DISKLESSIO) &&
+                                   td_ioengine_flagged(td, FIO_FAKEIO)))
                                f->real_file_size = f->io_size + f->file_offset;
                }
        }
@@ -1094,7 +1293,7 @@ int setup_files(struct thread_data *td)
                o->size = total_size;
 
        if (o->size < td_min_bs(td)) {
-               log_err("fio: blocksize too large for data set\n");
+               log_err("fio: blocksize is larger than data set range\n");
                goto err_out;
        }
 
@@ -1152,8 +1351,42 @@ int setup_files(struct thread_data *td)
        if (err)
                goto err_out;
 
-       if (!o->zone_size)
-               o->zone_size = o->size;
+       /*
+        * Prepopulate files with data. It might be expected to read some
+        * "real" data instead of zero'ed files (if no writes to file occurred
+        * prior to a read job). Engine has to provide a way to do that.
+        */
+       if (td->io_ops->prepopulate_file) {
+               temp_stall_ts = 1;
+
+               for_each_file(td, f, i) {
+                       if (output_format & FIO_OUTPUT_NORMAL) {
+                               log_info("%s: Prepopulating IO file (%s)\n",
+                                                       o->name, f->file_name);
+                       }
+
+                       err = td->io_ops->prepopulate_file(td, f);
+                       if (err)
+                               break;
+
+                       err = __file_invalidate_cache(td, f, f->file_offset,
+                                                               f->io_size);
+
+                       /*
+                        * Shut up static checker
+                        */
+                       if (f->fd != -1)
+                               close(f->fd);
+
+                       f->fd = -1;
+                       if (err)
+                               break;
+               }
+               temp_stall_ts = 0;
+       }
+
+       if (err)
+               goto err_out;
 
        /*
         * iolog already set the total io size, if we read back
@@ -1167,11 +1400,25 @@ int setup_files(struct thread_data *td)
        }
 
 done:
+       if (td->o.zone_mode == ZONE_MODE_ZBD) {
+               err = zbd_setup_files(td);
+               if (err)
+                       goto err_out;
+       }
+
        if (o->create_only)
                td->done = 1;
 
        td_restore_runstate(td, old_state);
+
+       if (td->o.fdp) {
+               err = fdp_init(td);
+               if (err)
+                       goto err_out;
+       }
+
        return 0;
+
 err_offset:
        log_err("%s: you need to specify valid offset=\n", o->name);
 err_out:
@@ -1179,7 +1426,7 @@ err_out:
        return 1;
 }
 
-int pre_read_files(struct thread_data *td)
+bool pre_read_files(struct thread_data *td)
 {
        struct fio_file *f;
        unsigned int i;
@@ -1187,46 +1434,45 @@ int pre_read_files(struct thread_data *td)
        dprint(FD_FILE, "pre_read files\n");
 
        for_each_file(td, f, i) {
-               if (pre_read_file(td, f))
-                       return -1;
+               if (!pre_read_file(td, f))
+                       return false;
        }
 
-       return 0;
+       return true;
 }
 
-static int __init_rand_distribution(struct thread_data *td, struct fio_file *f)
+static void __init_rand_distribution(struct thread_data *td, struct fio_file *f)
 {
        unsigned int range_size, seed;
-       unsigned long nranges;
+       uint64_t nranges;
        uint64_t fsize;
 
        range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
        fsize = min(f->real_file_size, f->io_size);
 
-       nranges = (fsize + range_size - 1) / range_size;
+       nranges = (fsize + range_size - 1ULL) / range_size;
 
-       seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
-       if (!td->o.rand_repeatable)
-               seed = td->rand_seeds[4];
+       seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number *
+               td->rand_seeds[FIO_RAND_BLOCK_OFF];
 
        if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
-               zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, seed);
+               zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, td->o.random_center.u.f, seed);
        else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
-               pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, seed);
+               pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, td->o.random_center.u.f, seed);
        else if (td->o.random_distribution == FIO_RAND_DIST_GAUSS)
-               gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, seed);
-
-       return 1;
+               gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, td->o.random_center.u.f, seed);
 }
 
-static int init_rand_distribution(struct thread_data *td)
+static bool init_rand_distribution(struct thread_data *td)
 {
        struct fio_file *f;
        unsigned int i;
        int state;
 
-       if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
-               return 0;
+       if (td->o.random_distribution == FIO_RAND_DIST_RANDOM ||
+           td->o.random_distribution == FIO_RAND_DIST_ZONED ||
+           td->o.random_distribution == FIO_RAND_DIST_ZONED_ABS)
+               return false;
 
        state = td_bump_runstate(td, TD_SETTING_UP);
 
@@ -1234,13 +1480,12 @@ static int init_rand_distribution(struct thread_data *td)
                __init_rand_distribution(td, f);
 
        td_restore_runstate(td, state);
-
-       return 1;
+       return true;
 }
 
 /*
  * Check if the number of blocks exceeds the randomness capability of
- * the selected generator. Tausworthe is 32-bit, the others are fullly
+ * the selected generator. Tausworthe is 32-bit, the others are fully
  * 64-bit capable.
  */
 static int check_rand_gen_limits(struct thread_data *td, struct fio_file *f,
@@ -1275,33 +1520,39 @@ static int check_rand_gen_limits(struct thread_data *td, struct fio_file *f,
        return 0;
 }
 
-int init_random_map(struct thread_data *td)
+bool init_random_map(struct thread_data *td)
 {
        unsigned long long blocks;
        struct fio_file *f;
        unsigned int i;
 
        if (init_rand_distribution(td))
-               return 0;
+               return true;
        if (!td_random(td))
-               return 0;
+               return true;
 
        for_each_file(td, f, i) {
                uint64_t fsize = min(f->real_file_size, f->io_size);
 
+               if (td->o.zone_mode == ZONE_MODE_STRIDED)
+                       fsize = td->o.zone_range;
+
                blocks = fsize / (unsigned long long) td->o.rw_min_bs;
 
                if (check_rand_gen_limits(td, f, blocks))
-                       return 1;
+                       return false;
 
                if (td->o.random_generator == FIO_RAND_GEN_LFSR) {
-                       unsigned long seed;
+                       uint64_t seed;
 
                        seed = td->rand_seeds[FIO_RAND_BLOCK_OFF];
 
                        if (!lfsr_init(&f->lfsr, blocks, seed, 0)) {
                                fio_file_set_lfsr(f);
                                continue;
+                       } else {
+                               log_err("fio: failed initializing LFSR\n");
+                               return false;
                        }
                } else if (!td->o.norandommap) {
                        f->io_axmap = axmap_new(blocks);
@@ -1317,14 +1568,14 @@ int init_random_map(struct thread_data *td)
                                " a large number of jobs, try the 'norandommap'"
                                " option or set 'softrandommap'. Or give"
                                " a larger --alloc-size to fio.\n");
-                       return 1;
+                       return false;
                }
 
                log_info("fio: file %s failed allocating random map. Running "
                         "job without.\n", f->file_name);
        }
 
-       return 0;
+       return true;
 }
 
 void close_files(struct thread_data *td)
@@ -1338,6 +1589,21 @@ void close_files(struct thread_data *td)
        }
 }
 
+void fio_file_free(struct fio_file *f)
+{
+       if (fio_file_axmap(f))
+               axmap_free(f->io_axmap);
+       if (f->ruhs_info)
+               sfree(f->ruhs_info);
+       if (!fio_file_smalloc(f)) {
+               free(f->file_name);
+               free(f);
+       } else {
+               sfree(f->file_name);
+               sfree(f);
+       }
+}
+
 void close_and_free_files(struct thread_data *td)
 {
        struct fio_file *f;
@@ -1361,13 +1627,9 @@ void close_and_free_files(struct thread_data *td)
                        td_io_unlink_file(td, f);
                }
 
-               sfree(f->file_name);
-               f->file_name = NULL;
-               if (fio_file_axmap(f)) {
-                       axmap_free(f->io_axmap);
-                       f->io_axmap = NULL;
-               }
-               sfree(f);
+               zbd_close_file(f);
+               fdp_free_ruhs_info(f);
+               fio_file_free(f);
        }
 
        td->o.filename = NULL;
@@ -1481,7 +1743,10 @@ static struct fio_file *alloc_new_file(struct thread_data *td)
 {
        struct fio_file *f;
 
-       f = smalloc(sizeof(*f));
+       if (td_ioengine_flagged(td, FIO_NOFILEHASH))
+               f = calloc(1, sizeof(*f));
+       else
+               f = scalloc(1, sizeof(*f));
        if (!f) {
                assert(0);
                return NULL;
@@ -1490,6 +1755,8 @@ static struct fio_file *alloc_new_file(struct thread_data *td)
        f->fd = -1;
        f->shadow_fd = -1;
        fio_file_reset(td, f);
+       if (!td_ioengine_flagged(td, FIO_NOFILEHASH))
+               fio_file_set_smalloc(f);
        return f;
 }
 
@@ -1564,9 +1831,13 @@ int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
        if (td->io_ops && td_ioengine_flagged(td, FIO_DISKLESSIO))
                f->real_file_size = -1ULL;
 
-       f->file_name = smalloc_strdup(file_name);
-       if (!f->file_name)
-               assert(0);
+       if (td_ioengine_flagged(td, FIO_NOFILEHASH))
+               f->file_name = strdup(file_name);
+       else
+               f->file_name = smalloc_strdup(file_name);
+
+       /* can't handle smalloc failure from here */
+       assert(f->file_name);
 
        get_file_type(f);
 
@@ -1577,7 +1848,7 @@ int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
                f->rwlock = fio_rwlock_init();
                break;
        case FILE_LOCK_EXCLUSIVE:
-               f->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
+               f->lock = fio_sem_init(FIO_SEM_UNLOCKED);
                break;
        default:
                log_err("fio: unknown lock mode: %d\n", td->o.file_lock_mode);
@@ -1585,10 +1856,9 @@ int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
        }
 
        td->files_index++;
-       if (f->filetype == FIO_TYPE_FILE)
-               td->nr_normal_files++;
 
-       set_already_allocated(file_name);
+       if (td->o.numjobs > 1)
+               set_already_allocated(file_name);
 
        if (inc)
                td->o.nr_files++;
@@ -1634,6 +1904,11 @@ int put_file(struct thread_data *td, struct fio_file *f)
        if (--f->references)
                return 0;
 
+       disk_util_dec(f->du);
+
+       if (td->o.file_lock_mode != FILE_LOCK_NONE)
+               unlock_file_all(td, f);
+
        if (should_fsync(td) && td->o.fsync_on_close) {
                f_ret = fsync(f->fd);
                if (f_ret < 0)
@@ -1647,6 +1922,7 @@ int put_file(struct thread_data *td, struct fio_file *f)
                ret = f_ret;
 
        td->nr_open_files--;
+       fio_file_clear_closing(f);
        fio_file_clear_open(f);
        assert(f->fd == -1);
        return ret;
@@ -1663,7 +1939,7 @@ void lock_file(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir)
                else
                        fio_rwlock_write(f->rwlock);
        } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
-               fio_mutex_down(f->lock);
+               fio_sem_down(f->lock);
 
        td->file_locks[f->fileno] = td->o.file_lock_mode;
 }
@@ -1676,7 +1952,7 @@ void unlock_file(struct thread_data *td, struct fio_file *f)
        if (td->o.file_lock_mode == FILE_LOCK_READWRITE)
                fio_rwlock_unlock(f->rwlock);
        else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
-               fio_mutex_up(f->lock);
+               fio_sem_up(f->lock);
 
        td->file_locks[f->fileno] = FILE_LOCK_NONE;
 }
@@ -1689,10 +1965,10 @@ void unlock_file_all(struct thread_data *td, struct fio_file *f)
                unlock_file(td, f);
 }
 
-static int recurse_dir(struct thread_data *td, const char *dirname)
+static bool recurse_dir(struct thread_data *td, const char *dirname)
 {
        struct dirent *dir;
-       int ret = 0;
+       bool ret = false;
        DIR *D;
 
        D = opendir(dirname);
@@ -1701,7 +1977,7 @@ static int recurse_dir(struct thread_data *td, const char *dirname)
 
                snprintf(buf, FIO_VERROR_SIZE, "opendir(%s)", dirname);
                td_verror(td, errno, buf);
-               return 1;
+               return true;
        }
 
        while ((dir = readdir(D)) != NULL) {
@@ -1711,12 +1987,12 @@ static int recurse_dir(struct thread_data *td, const char *dirname)
                if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
                        continue;
 
-               sprintf(full_path, "%s%s%s", dirname, FIO_OS_PATH_SEPARATOR, dir->d_name);
+               sprintf(full_path, "%s%c%s", dirname, FIO_OS_PATH_SEPARATOR, dir->d_name);
 
                if (lstat(full_path, &sb) == -1) {
                        if (errno != ENOENT) {
                                td_verror(td, errno, "stat");
-                               ret = 1;
+                               ret = true;
                                break;
                        }
                }
@@ -1757,21 +2033,25 @@ void dup_files(struct thread_data *td, struct thread_data *org)
        if (!org->files)
                return;
 
-       td->files = malloc(org->files_index * sizeof(f));
+       td->files = calloc(org->files_index, sizeof(f));
 
        if (td->o.file_lock_mode != FILE_LOCK_NONE)
                td->file_locks = malloc(org->files_index);
 
+       assert(org->files_index >= org->o.nr_files);
        for_each_file(org, f, i) {
                struct fio_file *__f;
 
                __f = alloc_new_file(td);
 
                if (f->file_name) {
-                       __f->file_name = smalloc_strdup(f->file_name);
-                       if (!__f->file_name)
-                               assert(0);
+                       if (td_ioengine_flagged(td, FIO_NOFILEHASH))
+                               __f->file_name = strdup(f->file_name);
+                       else
+                               __f->file_name = smalloc_strdup(f->file_name);
 
+                       /* can't handle smalloc failure from here */
+                       assert(__f->file_name);
                        __f->filetype = f->filetype;
                }
 
@@ -1808,7 +2088,6 @@ void free_release_files(struct thread_data *td)
        td->o.nr_files = 0;
        td->o.open_files = 0;
        td->files_index = 0;
-       td->nr_normal_files = 0;
 }
 
 void fio_file_reset(struct thread_data *td, struct fio_file *f)
@@ -1824,6 +2103,8 @@ void fio_file_reset(struct thread_data *td, struct fio_file *f)
                axmap_reset(f->io_axmap);
        else if (fio_file_lfsr(f))
                lfsr_reset(&f->lfsr, td->rand_seeds[FIO_RAND_BLOCK_OFF]);
+
+       zbd_file_reset(td, f);
 }
 
 bool fio_files_done(struct thread_data *td)
@@ -1868,6 +2149,7 @@ int fio_set_directio(struct thread_data *td, struct fio_file *f)
 
        return 0;
 #else
+       log_err("fio: direct IO is not supported on this host operating system\n");
        return -1;
 #endif
 }