t/nvmept_trim: increase transfer size for some tests
[fio.git] / verify.c
index 58f37aefb65f2777f7fc344f46aae3c45f6c5eec..b2fede24710fd3fcedfbcda890b3b9def9a957be 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -8,12 +8,14 @@
 #include <pthread.h>
 #include <libgen.h>
 
+#include "arch/arch.h"
 #include "fio.h"
 #include "verify.h"
 #include "trim.h"
 #include "lib/rand.h"
 #include "lib/hweight.h"
 #include "lib/pattern.h"
+#include "oslib/asprintf.h"
 
 #include "crc/md5.h"
 #include "crc/crc64.h"
 #include "crc/sha512.h"
 #include "crc/sha1.h"
 #include "crc/xxhash.h"
+#include "crc/sha3.h"
 
 static void populate_hdr(struct thread_data *td, struct io_u *io_u,
                         struct verify_header *hdr, unsigned int header_num,
                         unsigned int header_len);
-static void fill_hdr(struct thread_data *td, struct io_u *io_u,
-                    struct verify_header *hdr, unsigned int header_num,
-                    unsigned int header_len, uint64_t rand_seed);
 static void __fill_hdr(struct thread_data *td, struct io_u *io_u,
                       struct verify_header *hdr, unsigned int header_num,
                       unsigned int header_len, uint64_t rand_seed);
@@ -41,32 +41,27 @@ void fill_buffer_pattern(struct thread_data *td, void *p, unsigned int len)
        (void)cpy_pattern(td->o.buffer_pattern, td->o.buffer_pattern_bytes, p, len);
 }
 
-void __fill_buffer(struct thread_options *o, unsigned long seed, void *p,
-                  unsigned int len)
+static void __fill_buffer(struct thread_options *o, uint64_t seed, void *p,
+                         unsigned int len)
 {
        __fill_random_buf_percentage(seed, p, o->compress_percentage, len, len, o->buffer_pattern, o->buffer_pattern_bytes);
 }
 
-unsigned long fill_buffer(struct thread_data *td, void *p, unsigned int len)
-{
-       struct frand_state *fs = &td->verify_state;
-       struct thread_options *o = &td->o;
-
-       return fill_random_buf_percentage(fs, p, o->compress_percentage, len, len, o->buffer_pattern, o->buffer_pattern_bytes);
-}
-
 void fill_verify_pattern(struct thread_data *td, void *p, unsigned int len,
-                        struct io_u *io_u, unsigned long seed, int use_seed)
+                        struct io_u *io_u, uint64_t seed, int use_seed)
 {
        struct thread_options *o = &td->o;
 
        if (!o->verify_pattern_bytes) {
                dprint(FD_VERIFY, "fill random bytes len=%u\n", len);
 
-               if (use_seed)
-                       __fill_buffer(o, seed, p, len);
-               else
-                       io_u->rand_seed = fill_buffer(td, p, len);
+               if (!use_seed) {
+                       seed = __rand(&td->verify_state);
+                       if (sizeof(int) != sizeof(long *))
+                               seed *= (unsigned long)__rand(&td->verify_state);
+               }
+               io_u->rand_seed = seed;
+               __fill_buffer(o, seed, p, len);
                return;
        }
 
@@ -88,15 +83,20 @@ static unsigned int get_hdr_inc(struct thread_data *td, struct io_u *io_u)
 {
        unsigned int hdr_inc;
 
+       /*
+        * If we use bs_unaligned, buflen can be larger than the verify
+        * interval (which just defaults to the smallest blocksize possible).
+        */
        hdr_inc = io_u->buflen;
-       if (td->o.verify_interval && td->o.verify_interval <= io_u->buflen)
+       if (td->o.verify_interval && td->o.verify_interval <= io_u->buflen &&
+           !td->o.bs_unaligned)
                hdr_inc = td->o.verify_interval;
 
        return hdr_inc;
 }
 
 static void fill_pattern_headers(struct thread_data *td, struct io_u *io_u,
-                                unsigned long seed, int use_seed)
+                                uint64_t seed, int use_seed)
 {
        unsigned int hdr_inc, header_num;
        struct verify_header *hdr;
@@ -171,6 +171,18 @@ static inline unsigned int __hdr_size(int verify_type)
        case VERIFY_SHA512:
                len = sizeof(struct vhdr_sha512);
                break;
+       case VERIFY_SHA3_224:
+               len = sizeof(struct vhdr_sha3_224);
+               break;
+       case VERIFY_SHA3_256:
+               len = sizeof(struct vhdr_sha3_256);
+               break;
+       case VERIFY_SHA3_384:
+               len = sizeof(struct vhdr_sha3_384);
+               break;
+       case VERIFY_SHA3_512:
+               len = sizeof(struct vhdr_sha3_512);
+               break;
        case VERIFY_XXHASH:
                len = sizeof(struct vhdr_xxhash);
                break;
@@ -225,39 +237,27 @@ struct vcont {
 };
 
 #define DUMP_BUF_SZ    255
-static int dump_buf_warned;
 
 static void dump_buf(char *buf, unsigned int len, unsigned long long offset,
                     const char *type, struct fio_file *f)
 {
-       char *ptr, fname[DUMP_BUF_SZ];
-       size_t buf_left = DUMP_BUF_SZ;
+       char *ptr, *fname;
+       char sep[2] = { FIO_OS_PATH_SEPARATOR, 0 };
        int ret, fd;
 
        ptr = strdup(f->file_name);
 
-       memset(fname, 0, sizeof(fname));
-       if (aux_path)
-               sprintf(fname, "%s%s", aux_path, FIO_OS_PATH_SEPARATOR);
-
-       strncpy(fname + strlen(fname), basename(ptr), buf_left - 1);
-
-       buf_left -= strlen(fname);
-       if (buf_left <= 0) {
-               if (!dump_buf_warned) {
-                       log_err("fio: verify failure dump buffer too small\n");
-                       dump_buf_warned = 1;
-               }
-               free(ptr);
-               return;
+       if (asprintf(&fname, "%s%s%s.%llu.%s", aux_path ? : "",
+                    aux_path ? sep : "", basename(ptr), offset, type) < 0) {
+               if (!fio_did_warn(FIO_WARN_VERIFY_BUF))
+                       log_err("fio: not enough memory for dump buffer filename\n");
+               goto free_ptr;
        }
 
-       snprintf(fname + strlen(fname), buf_left, ".%llu.%s", offset, type);
-
        fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
        if (fd < 0) {
                perror("open verify buf file");
-               return;
+               goto free_fname;
        }
 
        while (len) {
@@ -274,6 +274,11 @@ static void dump_buf(char *buf, unsigned int len, unsigned long long offset,
 
        close(fd);
        log_err("       %s data dumped as %s\n", type, fname);
+
+free_fname:
+       free(fname);
+
+free_ptr:
        free(ptr);
 }
 
@@ -297,7 +302,7 @@ static void __dump_verify_buffers(struct verify_header *hdr, struct vcont *vc)
         */
        hdr_offset = vc->hdr_num * hdr->len;
 
-       dump_buf(io_u->buf + hdr_offset, hdr->len, io_u->offset + hdr_offset,
+       dump_buf(io_u->buf + hdr_offset, hdr->len, io_u->verify_offset + hdr_offset,
                        "received", vc->io_u->file);
 
        /*
@@ -312,7 +317,7 @@ static void __dump_verify_buffers(struct verify_header *hdr, struct vcont *vc)
 
        fill_pattern_headers(td, &dummy, hdr->rand_seed, 1);
 
-       dump_buf(buf + hdr_offset, hdr->len, io_u->offset + hdr_offset,
+       dump_buf(buf + hdr_offset, hdr->len, io_u->verify_offset + hdr_offset,
                        "expected", vc->io_u->file);
        free(buf);
 }
@@ -333,11 +338,21 @@ static void dump_verify_buffers(struct verify_header *hdr, struct vcont *vc)
 static void log_verify_failure(struct verify_header *hdr, struct vcont *vc)
 {
        unsigned long long offset;
+       uint32_t len;
+       struct thread_data *td = vc->td;
 
-       offset = vc->io_u->offset;
-       offset += vc->hdr_num * hdr->len;
-       log_err("%.8s: verify failed at file %s offset %llu, length %u\n",
-                       vc->name, vc->io_u->file->file_name, offset, hdr->len);
+       offset = vc->io_u->verify_offset;
+       if (td->o.verify != VERIFY_PATTERN_NO_HDR) {
+               len = hdr->len;
+               offset += (unsigned long long) vc->hdr_num * len;
+       } else {
+               len = vc->io_u->buflen;
+       }
+
+       log_err("%.8s: verify failed at file %s offset %llu, length %u"
+                       " (requested block: offset=%llu, length=%llu, flags=%x)\n",
+                       vc->name, vc->io_u->file->file_name, offset, len,
+                       vc->io_u->verify_offset, vc->io_u->buflen, vc->io_u->flags);
 
        if (vc->good_crc && vc->bad_crc) {
                log_err("       Expected CRC: ");
@@ -373,7 +388,7 @@ static int verify_io_u_pattern(struct verify_header *hdr, struct vcont *vc)
        (void)paste_format_inplace(pattern, pattern_size,
                                   td->o.verify_fmt, td->o.verify_fmt_sz, io_u);
 
-       buf = (void *) hdr + header_size;
+       buf = (char *) hdr + header_size;
        len = get_hdr_inc(td, io_u) - header_size;
        mod = (get_hdr_inc(td, io_u) * vc->hdr_num + header_size) % pattern_size;
 
@@ -391,8 +406,10 @@ static int verify_io_u_pattern(struct verify_header *hdr, struct vcont *vc)
                                (unsigned char)buf[i],
                                (unsigned char)pattern[mod],
                                bits);
-                       log_err("fio: bad pattern block offset %u\n", i);
-                       dump_verify_buffers(hdr, vc);
+                       log_err("fio: bad pattern block offset %u\n",
+                               i + header_size);
+                       vc->name = "pattern";
+                       log_verify_failure(hdr, vc);
                        return EILSEQ;
                }
                mod++;
@@ -429,6 +446,84 @@ static int verify_io_u_xxhash(struct verify_header *hdr, struct vcont *vc)
        return EILSEQ;
 }
 
+static int verify_io_u_sha3(struct verify_header *hdr, struct vcont *vc,
+                           struct fio_sha3_ctx *sha3_ctx, uint8_t *sha,
+                           unsigned int sha_size, const char *name)
+{
+       void *p = io_u_verify_off(hdr, vc);
+
+       dprint(FD_VERIFY, "%s verify io_u %p, len %u\n", name, vc->io_u, hdr->len);
+
+       fio_sha3_update(sha3_ctx, p, hdr->len - hdr_size(vc->td, hdr));
+       fio_sha3_final(sha3_ctx);
+
+       if (!memcmp(sha, sha3_ctx->sha, sha_size))
+               return 0;
+
+       vc->name = name;
+       vc->good_crc = sha;
+       vc->bad_crc = sha3_ctx->sha;
+       vc->crc_len = sha_size;
+       log_verify_failure(hdr, vc);
+       return EILSEQ;
+}
+
+static int verify_io_u_sha3_224(struct verify_header *hdr, struct vcont *vc)
+{
+       struct vhdr_sha3_224 *vh = hdr_priv(hdr);
+       uint8_t sha[SHA3_224_DIGEST_SIZE];
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = sha,
+       };
+
+       fio_sha3_224_init(&sha3_ctx);
+
+       return verify_io_u_sha3(hdr, vc, &sha3_ctx, vh->sha,
+                               SHA3_224_DIGEST_SIZE, "sha3-224");
+}
+
+static int verify_io_u_sha3_256(struct verify_header *hdr, struct vcont *vc)
+{
+       struct vhdr_sha3_256 *vh = hdr_priv(hdr);
+       uint8_t sha[SHA3_256_DIGEST_SIZE];
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = sha,
+       };
+
+       fio_sha3_256_init(&sha3_ctx);
+
+       return verify_io_u_sha3(hdr, vc, &sha3_ctx, vh->sha,
+                               SHA3_256_DIGEST_SIZE, "sha3-256");
+}
+
+static int verify_io_u_sha3_384(struct verify_header *hdr, struct vcont *vc)
+{
+       struct vhdr_sha3_384 *vh = hdr_priv(hdr);
+       uint8_t sha[SHA3_384_DIGEST_SIZE];
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = sha,
+       };
+
+       fio_sha3_384_init(&sha3_ctx);
+
+       return verify_io_u_sha3(hdr, vc, &sha3_ctx, vh->sha,
+                               SHA3_384_DIGEST_SIZE, "sha3-384");
+}
+
+static int verify_io_u_sha3_512(struct verify_header *hdr, struct vcont *vc)
+{
+       struct vhdr_sha3_512 *vh = hdr_priv(hdr);
+       uint8_t sha[SHA3_512_DIGEST_SIZE];
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = sha,
+       };
+
+       fio_sha3_512_init(&sha3_ctx);
+
+       return verify_io_u_sha3(hdr, vc, &sha3_ctx, vh->sha,
+                               SHA3_512_DIGEST_SIZE, "sha3-512");
+}
+
 static int verify_io_u_sha512(struct verify_header *hdr, struct vcont *vc)
 {
        void *p = io_u_verify_off(hdr, vc);
@@ -651,13 +746,13 @@ int verify_io_u_async(struct thread_data *td, struct io_u **io_u_ptr)
 
        if (io_u->flags & IO_U_F_IN_CUR_DEPTH) {
                td->cur_depth--;
-               io_u_clear(io_u, IO_U_F_IN_CUR_DEPTH);
+               io_u_clear(td, io_u, IO_U_F_IN_CUR_DEPTH);
        }
        flist_add_tail(&io_u->verify_list, &td->verify_list);
        *io_u_ptr = NULL;
-       pthread_mutex_unlock(&td->io_u_lock);
 
        pthread_cond_signal(&td->verify_cond);
+       pthread_mutex_unlock(&td->io_u_lock);
        return 0;
 }
 
@@ -713,9 +808,9 @@ static int verify_trimmed_io_u(struct thread_data *td, struct io_u *io_u)
 
        mem_is_zero_slow(io_u->buf, io_u->buflen, &offset);
 
-       log_err("trim: verify failed at file %s offset %llu, length %lu"
+       log_err("trim: verify failed at file %s offset %llu, length %llu"
                ", block offset %lu\n",
-                       io_u->file->file_name, io_u->offset, io_u->buflen,
+                       io_u->file->file_name, io_u->verify_offset, io_u->buflen,
                        (unsigned long) offset);
        return EILSEQ;
 }
@@ -743,10 +838,10 @@ static int verify_header(struct io_u *io_u, struct thread_data *td,
                        hdr->rand_seed, io_u->rand_seed);
                goto err;
        }
-       if (hdr->offset != io_u->offset + hdr_num * td->o.verify_interval) {
+       if (hdr->offset != io_u->verify_offset + hdr_num * td->o.verify_interval) {
                log_err("verify: bad header offset %"PRIu64
                        ", wanted %llu",
-                       hdr->offset, io_u->offset);
+                       hdr->offset, io_u->verify_offset);
                goto err;
        }
 
@@ -754,13 +849,11 @@ static int verify_header(struct io_u *io_u, struct thread_data *td,
         * For read-only workloads, the program cannot be certain of the
         * last numberio written to a block. Checking of numberio will be
         * done only for workloads that write data.  For verify_only,
-        * numberio will be checked in the last iteration when the correct
-        * state of numberio, that would have been written to each block
-        * in a previous run of fio, has been reached.
+        * numberio check is skipped.
         */
-       if ((td_write(td) || td_rw(td)) && (td_min_bs(td) == td_max_bs(td)) &&
+       if (td_write(td) && (td_min_bs(td) == td_max_bs(td)) &&
            !td->o.time_based)
-               if (!td->o.verify_only || td->o.loops == 0)
+               if (!td->o.verify_only)
                        if (hdr->numberio != io_u->numberio) {
                                log_err("verify: bad header numberio %"PRIu16
                                        ", wanted %"PRIu16,
@@ -777,12 +870,14 @@ static int verify_header(struct io_u *io_u, struct thread_data *td,
        return 0;
 
 err:
-       log_err(" at file %s offset %llu, length %u\n",
+       log_err(" at file %s offset %llu, length %u"
+               " (requested block: offset=%llu, length=%llu)\n",
                io_u->file->file_name,
-               io_u->offset + hdr_num * hdr_len, hdr_len);
+               io_u->verify_offset + hdr_num * hdr_len, hdr_len,
+               io_u->verify_offset, io_u->buflen);
 
        if (td->o.verify_dump)
-               dump_buf(p, hdr_len, io_u->offset + hdr_num * hdr_len,
+               dump_buf(p, hdr_len, io_u->verify_offset + hdr_num * hdr_len,
                                "hdr_fail", io_u->file);
 
        return EILSEQ;
@@ -802,7 +897,7 @@ int verify_io_u(struct thread_data *td, struct io_u **io_u_ptr)
         * If the IO engine is faking IO (like null), then just pretend
         * we verified everything.
         */
-       if (td->io_ops->flags & FIO_FAKEIO)
+       if (td_ioengine_flagged(td, FIO_FAKEIO))
                return 0;
 
        if (io_u->flags & IO_U_F_TRIMMED) {
@@ -831,10 +926,11 @@ int verify_io_u(struct thread_data *td, struct io_u **io_u_ptr)
                hdr = p;
 
                /*
-                * Make rand_seed check pass when have verifysort or
-                * verify_backlog.
+                * Make rand_seed check pass when have verify_backlog or
+                * zone reset frequency for zonemode=zbd.
                 */
-               if (td->o.verifysort || (td->flags & TD_F_VER_BACKLOG))
+               if (!td_rw(td) || (td->flags & TD_F_VER_BACKLOG) ||
+                   td->o.zrf.u.f)
                        io_u->rand_seed = hdr->rand_seed;
 
                if (td->o.verify != VERIFY_PATTERN_NO_HDR) {
@@ -880,6 +976,18 @@ int verify_io_u(struct thread_data *td, struct io_u **io_u_ptr)
                case VERIFY_SHA512:
                        ret = verify_io_u_sha512(hdr, &vc);
                        break;
+               case VERIFY_SHA3_224:
+                       ret = verify_io_u_sha3_224(hdr, &vc);
+                       break;
+               case VERIFY_SHA3_256:
+                       ret = verify_io_u_sha3_256(hdr, &vc);
+                       break;
+               case VERIFY_SHA3_384:
+                       ret = verify_io_u_sha3_384(hdr, &vc);
+                       break;
+               case VERIFY_SHA3_512:
+                       ret = verify_io_u_sha3_512(hdr, &vc);
+                       break;
                case VERIFY_XXHASH:
                        ret = verify_io_u_xxhash(hdr, &vc);
                        break;
@@ -917,6 +1025,56 @@ static void fill_xxhash(struct verify_header *hdr, void *p, unsigned int len)
        vh->hash = XXH32_digest(state);
 }
 
+static void fill_sha3(struct fio_sha3_ctx *sha3_ctx, void *p, unsigned int len)
+{
+       fio_sha3_update(sha3_ctx, p, len);
+       fio_sha3_final(sha3_ctx);
+}
+
+static void fill_sha3_224(struct verify_header *hdr, void *p, unsigned int len)
+{
+       struct vhdr_sha3_224 *vh = hdr_priv(hdr);
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = vh->sha,
+       };
+
+       fio_sha3_224_init(&sha3_ctx);
+       fill_sha3(&sha3_ctx, p, len);
+}
+
+static void fill_sha3_256(struct verify_header *hdr, void *p, unsigned int len)
+{
+       struct vhdr_sha3_256 *vh = hdr_priv(hdr);
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = vh->sha,
+       };
+
+       fio_sha3_256_init(&sha3_ctx);
+       fill_sha3(&sha3_ctx, p, len);
+}
+
+static void fill_sha3_384(struct verify_header *hdr, void *p, unsigned int len)
+{
+       struct vhdr_sha3_384 *vh = hdr_priv(hdr);
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = vh->sha,
+       };
+
+       fio_sha3_384_init(&sha3_ctx);
+       fill_sha3(&sha3_ctx, p, len);
+}
+
+static void fill_sha3_512(struct verify_header *hdr, void *p, unsigned int len)
+{
+       struct vhdr_sha3_512 *vh = hdr_priv(hdr);
+       struct fio_sha3_ctx sha3_ctx = {
+               .sha = vh->sha,
+       };
+
+       fio_sha3_512_init(&sha3_ctx);
+       fill_sha3(&sha3_ctx, p, len);
+}
+
 static void fill_sha512(struct verify_header *hdr, void *p, unsigned int len)
 {
        struct vhdr_sha512 *vh = hdr_priv(hdr);
@@ -1009,9 +1167,9 @@ static void __fill_hdr(struct thread_data *td, struct io_u *io_u,
        hdr->verify_type = td->o.verify;
        hdr->len = header_len;
        hdr->rand_seed = rand_seed;
-       hdr->offset = io_u->offset + header_num * td->o.verify_interval;
+       hdr->offset = io_u->verify_offset + header_num * td->o.verify_interval;
        hdr->time_sec = io_u->start_time.tv_sec;
-       hdr->time_usec = io_u->start_time.tv_usec;
+       hdr->time_nsec = io_u->start_time.tv_nsec;
        hdr->thread = td->thread_number;
        hdr->numberio = io_u->numberio;
        hdr->crc32 = fio_crc32c(p, offsetof(struct verify_header, crc32));
@@ -1022,7 +1180,6 @@ static void fill_hdr(struct thread_data *td, struct io_u *io_u,
                     struct verify_header *hdr, unsigned int header_num,
                     unsigned int header_len, uint64_t rand_seed)
 {
-
        if (td->o.verify != VERIFY_PATTERN_NO_HDR)
                __fill_hdr(td, io_u, hdr, header_num, header_len, rand_seed);
 }
@@ -1032,12 +1189,17 @@ static void populate_hdr(struct thread_data *td, struct io_u *io_u,
                         unsigned int header_len)
 {
        unsigned int data_len;
-       void *data, *p;
+       void *data;
+       char *p;
 
-       p = (void *) hdr;
+       p = (char *) hdr;
 
        fill_hdr(td, io_u, hdr, header_num, header_len, io_u->rand_seed);
 
+       if (header_len <= hdr_size(td, hdr)) {
+               td_verror(td, EINVAL, "Blocksize too small");
+               return;
+       }
        data_len = header_len - hdr_size(td, hdr);
 
        data = p + hdr_size(td, hdr);
@@ -1083,6 +1245,26 @@ static void populate_hdr(struct thread_data *td, struct io_u *io_u,
                                                io_u, hdr->len);
                fill_sha512(hdr, data, data_len);
                break;
+       case VERIFY_SHA3_224:
+               dprint(FD_VERIFY, "fill sha3-224 io_u %p, len %u\n",
+                                               io_u, hdr->len);
+               fill_sha3_224(hdr, data, data_len);
+               break;
+       case VERIFY_SHA3_256:
+               dprint(FD_VERIFY, "fill sha3-256 io_u %p, len %u\n",
+                                               io_u, hdr->len);
+               fill_sha3_256(hdr, data, data_len);
+               break;
+       case VERIFY_SHA3_384:
+               dprint(FD_VERIFY, "fill sha3-384 io_u %p, len %u\n",
+                                               io_u, hdr->len);
+               fill_sha3_384(hdr, data, data_len);
+               break;
+       case VERIFY_SHA3_512:
+               dprint(FD_VERIFY, "fill sha3-512 io_u %p, len %u\n",
+                                               io_u, hdr->len);
+               fill_sha3_512(hdr, data, data_len);
+               break;
        case VERIFY_XXHASH:
                dprint(FD_VERIFY, "fill xxhash io_u %p, len %u\n",
                                                io_u, hdr->len);
@@ -1116,8 +1298,6 @@ void populate_verify_io_u(struct thread_data *td, struct io_u *io_u)
        if (td->o.verify == VERIFY_NULL)
                return;
 
-       io_u->numberio = td->io_issues[io_u->ddir];
-
        fill_pattern_headers(td, io_u, 0, 0);
 }
 
@@ -1132,15 +1312,14 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u)
                return 0;
 
        if (!RB_EMPTY_ROOT(&td->io_hist_tree)) {
-               struct rb_node *n = rb_first(&td->io_hist_tree);
+               struct fio_rb_node *n = rb_first(&td->io_hist_tree);
 
                ipo = rb_entry(n, struct io_piece, rb_node);
 
                /*
                 * Ensure that the associated IO has completed
                 */
-               read_barrier();
-               if (ipo->flags & IP_F_IN_FLIGHT)
+               if (atomic_load_acquire(&ipo->flags) & IP_F_IN_FLIGHT)
                        goto nothing;
 
                rb_erase(n, &td->io_hist_tree);
@@ -1152,8 +1331,7 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u)
                /*
                 * Ensure that the associated IO has completed
                 */
-               read_barrier();
-               if (ipo->flags & IP_F_IN_FLIGHT)
+               if (atomic_load_acquire(&ipo->flags) & IP_F_IN_FLIGHT)
                        goto nothing;
 
                flist_del(&ipo->list);
@@ -1165,13 +1343,14 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u)
                td->io_hist_len--;
 
                io_u->offset = ipo->offset;
+               io_u->verify_offset = ipo->offset;
                io_u->buflen = ipo->len;
                io_u->numberio = ipo->numberio;
                io_u->file = ipo->file;
-               io_u_set(io_u, IO_U_F_VER_LIST);
+               io_u_set(td, io_u, IO_U_F_VER_LIST);
 
                if (ipo->flags & IP_F_TRIMMED)
-                       io_u_set(io_u, IO_U_F_TRIMMED);
+                       io_u_set(td, io_u, IO_U_F_TRIMMED);
 
                if (!fio_file_open(io_u->file)) {
                        int r = td_io_open_file(td, io_u->file);
@@ -1210,6 +1389,7 @@ void fio_verify_init(struct thread_data *td)
 {
        if (td->o.verify == VERIFY_CRC32C_INTEL ||
            td->o.verify == VERIFY_CRC32C) {
+               crc32c_arm64_probe();
                crc32c_intel_probe();
        }
 }
@@ -1240,7 +1420,6 @@ static void *verify_async_thread(void *data)
                        ret = pthread_cond_wait(&td->verify_cond,
                                                        &td->io_u_lock);
                        if (ret) {
-                               pthread_mutex_unlock(&td->io_u_lock);
                                break;
                        }
                }
@@ -1255,7 +1434,7 @@ static void *verify_async_thread(void *data)
                        io_u = flist_first_entry(&list, struct io_u, verify_list);
                        flist_del_init(&io_u->verify_list);
 
-                       io_u_set(io_u, IO_U_F_NO_FILE_PUT);
+                       io_u_set(td, io_u, IO_U_F_NO_FILE_PUT);
                        ret = verify_io_u(td, &io_u);
 
                        put_io_u(td, io_u);
@@ -1278,9 +1457,9 @@ static void *verify_async_thread(void *data)
 done:
        pthread_mutex_lock(&td->io_u_lock);
        td->nr_verify_threads--;
+       pthread_cond_signal(&td->free_cond);
        pthread_mutex_unlock(&td->io_u_lock);
 
-       pthread_cond_signal(&td->free_cond);
        return NULL;
 }
 
@@ -1290,7 +1469,7 @@ int verify_async_init(struct thread_data *td)
        pthread_attr_t attr;
 
        pthread_attr_init(&attr);
-       pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN);
+       pthread_attr_setstacksize(&attr, 2 * PTHREAD_STACK_MIN);
 
        td->verify_thread_exit = 0;
 
@@ -1316,9 +1495,12 @@ int verify_async_init(struct thread_data *td)
 
        if (i != td->o.verify_async) {
                log_err("fio: only %d verify threads started, exiting\n", i);
+
+               pthread_mutex_lock(&td->io_u_lock);
                td->verify_thread_exit = 1;
-               write_barrier();
                pthread_cond_broadcast(&td->verify_cond);
+               pthread_mutex_unlock(&td->io_u_lock);
+
                return 1;
        }
 
@@ -1327,12 +1509,10 @@ int verify_async_init(struct thread_data *td)
 
 void verify_async_exit(struct thread_data *td)
 {
+       pthread_mutex_lock(&td->io_u_lock);
        td->verify_thread_exit = 1;
-       write_barrier();
        pthread_cond_broadcast(&td->verify_cond);
 
-       pthread_mutex_lock(&td->io_u_lock);
-
        while (td->nr_verify_threads)
                pthread_cond_wait(&td->free_cond, &td->io_u_lock);
 
@@ -1346,7 +1526,7 @@ int paste_blockoff(char *buf, unsigned int len, void *priv)
        struct io_u *io = priv;
        unsigned long long off;
 
-       typecheck(typeof(off), io->offset);
+       typecheck(__typeof__(off), io->offset);
        off = cpu_to_le64((uint64_t)io->offset);
        len = min(len, (unsigned int)sizeof(off));
        memcpy(buf, &off, len);
@@ -1397,10 +1577,9 @@ static int fill_file_completions(struct thread_data *td,
 struct all_io_list *get_all_io_list(int save_mask, size_t *sz)
 {
        struct all_io_list *rep;
-       struct thread_data *td;
        size_t depth;
        void *next;
-       int i, nr;
+       int nr;
 
        compiletime_assert(sizeof(struct all_io_list) == 8, "all_io_list");
 
@@ -1410,14 +1589,14 @@ struct all_io_list *get_all_io_list(int save_mask, size_t *sz)
         */
        depth = 0;
        nr = 0;
-       for_each_td(td, i) {
-               if (save_mask != IO_LIST_ALL && (i + 1) != save_mask)
+       for_each_td(td) {
+               if (save_mask != IO_LIST_ALL && (__td_index + 1) != save_mask)
                        continue;
                td->stop_io = 1;
                td->flags |= TD_F_VSTATE_SAVED;
                depth += (td->o.iodepth * td->o.nr_files);
                nr++;
-       }
+       } end_for_each();
 
        if (!nr)
                return NULL;
@@ -1425,26 +1604,25 @@ struct all_io_list *get_all_io_list(int save_mask, size_t *sz)
        *sz = sizeof(*rep);
        *sz += nr * sizeof(struct thread_io_list);
        *sz += depth * sizeof(struct file_comp);
-       rep = malloc(*sz);
-       memset(rep, 0, *sz);
+       rep = calloc(1, *sz);
 
        rep->threads = cpu_to_le64((uint64_t) nr);
 
        next = &rep->state[0];
-       for_each_td(td, i) {
+       for_each_td(td) {
                struct thread_io_list *s = next;
                unsigned int comps, index = 0;
 
-               if (save_mask != IO_LIST_ALL && (i + 1) != save_mask)
+               if (save_mask != IO_LIST_ALL && (__td_index + 1) != save_mask)
                        continue;
 
                comps = fill_file_completions(td, s, &index);
 
                s->no_comps = cpu_to_le64((uint64_t) comps);
-               s->depth = cpu_to_le64((uint64_t) td->o.iodepth);
-               s->nofiles = cpu_to_le64((uint64_t) td->o.nr_files);
+               s->depth = cpu_to_le32((uint32_t) td->o.iodepth);
+               s->nofiles = cpu_to_le32((uint32_t) td->o.nr_files);
                s->numberio = cpu_to_le64((uint64_t) td->io_issues[DDIR_WRITE]);
-               s->index = cpu_to_le64((uint64_t) i);
+               s->index = cpu_to_le64((uint64_t) __td_index);
                if (td->random_state.use64) {
                        s->rand.state64.s[0] = cpu_to_le64(td->random_state.state64.s1);
                        s->rand.state64.s[1] = cpu_to_le64(td->random_state.state64.s2);
@@ -1460,10 +1638,9 @@ struct all_io_list *get_all_io_list(int save_mask, size_t *sz)
                        s->rand.state32.s[3] = 0;
                        s->rand.use64 = 0;
                }
-               s->name[sizeof(s->name) - 1] = '\0';
-               strncpy((char *) s->name, td->o.name, sizeof(s->name) - 1);
+               snprintf((char *) s->name, sizeof(s->name), "%s", td->o.name);
                next = io_list_next(s);
-       }
+       } end_for_each();
 
        return rep;
 }
@@ -1480,6 +1657,10 @@ static int open_state_file(const char *name, const char *prefix, int num,
        else
                flags = O_RDONLY;
 
+#ifdef _WIN32
+       flags |= O_BINARY;
+#endif
+
        verify_state_gen_name(out, sizeof(out), name, prefix, num);
 
        fd = open(out, flags, 0644);
@@ -1548,7 +1729,7 @@ void verify_save_state(int mask)
                char prefix[PATH_MAX];
 
                if (aux_path)
-                       sprintf(prefix, "%s%slocal", aux_path, FIO_OS_PATH_SEPARATOR);
+                       sprintf(prefix, "%s%clocal", aux_path, FIO_OS_PATH_SEPARATOR);
                else
                        strcpy(prefix, "local");
 
@@ -1696,7 +1877,7 @@ int verify_state_should_stop(struct thread_data *td, struct io_u *io_u)
        for (i = 0; i < s->no_comps; i++) {
                if (s->comps[i].fileno != f->fileno)
                        continue;
-               if (io_u->offset == s->comps[i].offset)
+               if (io_u->verify_offset == s->comps[i].offset)
                        return 0;
        }