X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=io_u.c;h=404c75b382ab5e187a23ea33134092e05b40f2a8;hp=4246edff0b2ffe19242dace14d4271ba851c7856;hb=2ea93f982e728343f823c2cf63b4674a104575bf;hpb=0bcf41cdc22dfee6b3f3b2ba9a533b4b103c70c2

diff --git a/io_u.c b/io_u.c
index 4246edff..404c75b3 100644
--- a/io_u.c
+++ b/io_u.c
@@ -157,6 +157,66 @@ static int __get_next_rand_offset_gauss(struct thread_data *td,
 	return 0;
 }
 
+static int __get_next_rand_offset_zoned_abs(struct thread_data *td,
+					    struct fio_file *f,
+					    enum fio_ddir ddir, uint64_t *b)
+{
+	struct zone_split_index *zsi;
+	uint64_t lastb, send, stotal;
+	static int warned;
+	unsigned int v;
+
+	lastb = last_block(td, f, ddir);
+	if (!lastb)
+		return 1;
+
+	if (!td->o.zone_split_nr[ddir]) {
+bail:
+		return __get_next_rand_offset(td, f, ddir, b, lastb);
+	}
+
+	/*
+	 * Generate a value, v, between 1 and 100, both inclusive
+	 */
+	v = rand32_between(&td->zone_state, 1, 100);
+
+	/*
+	 * Find our generated table. 'send' is the end block of this zone,
+	 * 'stotal' is our start offset.
+	 */
+	zsi = &td->zone_state_index[ddir][v - 1];
+	stotal = zsi->size_prev / td->o.ba[ddir];
+	send = zsi->size / td->o.ba[ddir];
+
+	/*
+	 * Should never happen
+	 */
+	if (send == -1U) {
+		if (!warned) {
+			log_err("fio: bug in zoned generation\n");
+			warned = 1;
+		}
+		goto bail;
+	} else if (send > lastb) {
+		/*
+		 * This happens if the user specifies ranges that exceed
+		 * the file/device size. We can't handle that gracefully,
+		 * so error and exit.
+		 */
+		log_err("fio: zoned_abs sizes exceed file size\n");
+		return 1;
+	}
+
+	/*
+	 * Generate index from 0..send-stotal
+	 */
+	if (__get_next_rand_offset(td, f, ddir, b, send - stotal) == 1)
+		return 1;
+
+	*b += stotal;
+	return 0;
+}
+
 static int __get_next_rand_offset_zoned(struct thread_data *td,
 					struct fio_file *f, enum fio_ddir ddir,
 					uint64_t *b)
@@ -249,6 +309,8 @@ static int get_off_from_method(struct thread_data *td, struct fio_file *f,
 		return __get_next_rand_offset_gauss(td, f, ddir, b);
 	else if (td->o.random_distribution == FIO_RAND_DIST_ZONED)
 		return __get_next_rand_offset_zoned(td, f, ddir, b);
+	else if (td->o.random_distribution == FIO_RAND_DIST_ZONED_ABS)
+		return __get_next_rand_offset_zoned_abs(td, f, ddir, b);
 
 	log_err("fio: unknown random distribution: %d\n", td->o.random_distribution);
 	return 1;
@@ -361,16 +423,13 @@ static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
 
 	assert(ddir_rw(ddir));
 
+	/*
+	 * If we reach the end for a time based run, reset us back to 0
+	 * and invalidate the cache, if we need to.
+	 */
 	if (f->last_pos[ddir] >= f->io_size + get_start_offset(td, f) &&
 	    o->time_based) {
-		struct thread_options *o = &td->o;
-		uint64_t io_size = f->io_size + (f->io_size % o->min_bs[ddir]);
-
-		if (io_size > f->last_pos[ddir])
-			f->last_pos[ddir] = 0;
-		else
-			f->last_pos[ddir] = f->last_pos[ddir] - io_size;
-
+		f->last_pos[ddir] = f->file_offset;
 		loop_cache_invalidate(td, f);
 	}
 
@@ -700,11 +759,11 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
 			return odir;
 
 		/*
-		 * Both directions are ahead of rate. sleep the min
-		 * switch if necissary
+		 * Both directions are ahead of rate. sleep the min,
+		 * switch if necessary
 		 */
 		if (td->rate_next_io_time[ddir] <=
-			td->rate_next_io_time[odir]) {
+		    td->rate_next_io_time[odir]) {
 			usec = td->rate_next_io_time[ddir] - now;
 		} else {
 			usec = td->rate_next_io_time[odir] - now;
@@ -716,8 +775,7 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
 	if (td->o.io_submit_mode == IO_MODE_INLINE)
 		io_u_quiesce(td);
 
-	usec = usec_sleep(td, usec);
-
+	usec_sleep(td, usec);
 	return ddir;
 }
 
@@ -864,6 +922,45 @@ void requeue_io_u(struct thread_data *td, struct io_u **io_u)
 	*io_u = NULL;
 }
 
+static void __fill_io_u_zone(struct thread_data *td, struct io_u *io_u)
+{
+	struct fio_file *f = io_u->file;
+
+	/*
+	 * See if it's time to switch to a new zone
+	 */
+	if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) {
+		td->zone_bytes = 0;
+		f->file_offset += td->o.zone_range + td->o.zone_skip;
+
+		/*
+		 * Wrap from the beginning, if we exceed the file size
+		 */
+		if (f->file_offset >= f->real_file_size)
+			f->file_offset = f->real_file_size - f->file_offset;
+		f->last_pos[io_u->ddir] = f->file_offset;
+		td->io_skip_bytes += td->o.zone_skip;
+	}
+
+	/*
+ 	 * If zone_size > zone_range, then maintain the same zone until
+ 	 * zone_bytes >= zone_size.
+ 	 */
+	if (f->last_pos[io_u->ddir] >= (f->file_offset + td->o.zone_range)) {
+		dprint(FD_IO, "io_u maintain zone offset=%" PRIu64 "/last_pos=%" PRIu64 "\n",
+				f->file_offset, f->last_pos[io_u->ddir]);
+		f->last_pos[io_u->ddir] = f->file_offset;
+	}
+
+	/*
+	 * For random: if 'norandommap' is not set and zone_size > zone_range,
+	 * map needs to be reset as it's done with zone_range everytime.
+	 */
+	if ((td->zone_bytes % td->o.zone_range) == 0) {
+		fio_file_reset(td, f);
+	}
+}
+
 static int fill_io_u(struct thread_data *td, struct io_u *io_u)
 {
 	unsigned int is_random;
@@ -880,21 +977,10 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
 		goto out;
 
 	/*
-	 * See if it's time to switch to a new zone
+	 * When file is zoned zone_range is always positive
 	 */
-	if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) {
-		struct fio_file *f = io_u->file;
-
-		td->zone_bytes = 0;
-		f->file_offset += td->o.zone_range + td->o.zone_skip;
-
-		/*
-		 * Wrap from the beginning, if we exceed the file size
-		 */
-		if (f->file_offset >= f->real_file_size)
-			f->file_offset = f->real_file_size - f->file_offset;
-		f->last_pos[io_u->ddir] = f->file_offset;
-		td->io_skip_bytes += td->o.zone_skip;
+	if (td->o.zone_range) {
+		__fill_io_u_zone(td, io_u);
 	}
 
 	/*
@@ -913,9 +999,8 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
 	}
 
 	if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
-		dprint(FD_IO, "io_u %p, offset + buflen exceeds file size\n",
-			io_u);
-		dprint(FD_IO, "  offset=%llu/buflen=%lu > %llu\n",
+		dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%lx exceeds file size=0x%llx\n",
+			io_u,
 			(unsigned long long) io_u->offset, io_u->buflen,
 			(unsigned long long) io_u->file->real_file_size);
 		return 1;
@@ -928,7 +1013,7 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
 		mark_random_map(td, io_u);
 
 out:
-	dprint_io_u(io_u, "fill_io_u");
+	dprint_io_u(io_u, "fill");
 	td->zone_bytes += io_u->buflen;
 	return 0;
 }
@@ -1350,10 +1435,10 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
 }
 
 static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
-		      unsigned long tusec, unsigned long max_usec)
+		      unsigned long long tnsec, unsigned long long max_nsec)
 {
 	if (!td->error)
-		log_err("fio: latency of %lu usec exceeds specified max (%lu usec)\n", tusec, max_usec);
+		log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
 	td_verror(td, ETIMEDOUT, "max latency exceeded");
 	icd->error = ETIMEDOUT;
 }
@@ -1556,22 +1641,19 @@ static bool check_get_trim(struct thread_data *td, struct io_u *io_u)
 {
 	if (!(td->flags & TD_F_TRIM_BACKLOG))
 		return false;
+	if (!td->trim_entries)
+		return false;
 
-	if (td->trim_entries) {
-		int get_trim = 0;
-
-		if (td->trim_batch) {
-			td->trim_batch--;
-			get_trim = 1;
-		} else if (!(td->io_hist_len % td->o.trim_backlog) &&
-			 td->last_ddir != DDIR_READ) {
-			td->trim_batch = td->o.trim_batch;
-			if (!td->trim_batch)
-				td->trim_batch = td->o.trim_backlog;
-			get_trim = 1;
-		}
-
-		if (get_trim && get_next_trim(td, io_u))
+	if (td->trim_batch) {
+		td->trim_batch--;
+		if (get_next_trim(td, io_u))
+			return true;
+	} else if (!(td->io_hist_len % td->o.trim_backlog) &&
+		     td->last_ddir != DDIR_READ) {
+		td->trim_batch = td->o.trim_batch;
+		if (!td->trim_batch)
+			td->trim_batch = td->o.trim_backlog;
+		if (get_next_trim(td, io_u))
 			return true;
 	}
 
@@ -1613,32 +1695,40 @@ static bool check_get_verify(struct thread_data *td, struct io_u *io_u)
  */
 static void small_content_scramble(struct io_u *io_u)
 {
-	unsigned int i, nr_blocks = io_u->buflen / 512;
-	uint64_t boffset;
+	unsigned int i, nr_blocks = io_u->buflen >> 9;
 	unsigned int offset;
-	char *p, *end;
+	uint64_t boffset, *iptr;
+	char *p;
 
 	if (!nr_blocks)
 		return;
 
 	p = io_u->xfer_buf;
 	boffset = io_u->offset;
-	io_u->buf_filled_len = 0;
+
+	if (io_u->buf_filled_len)
+		io_u->buf_filled_len = 0;
+
+	/*
+	 * Generate random index between 0..7. We do chunks of 512b, if
+	 * we assume a cacheline is 64 bytes, then we have 8 of those.
+	 * Scramble content within the blocks in the same cacheline to
+	 * speed things up.
+	 */
+	offset = (io_u->start_time.tv_nsec ^ boffset) & 7;
 
 	for (i = 0; i < nr_blocks; i++) {
 		/*
-		 * Fill the byte offset into a "random" start offset of
-		 * the buffer, given by the product of the usec time
-		 * and the actual offset.
+		 * Fill offset into start of cacheline, time into end
+		 * of cacheline
 		 */
-		offset = ((io_u->start_time.tv_nsec/1000) ^ boffset) & 511;
-		offset &= ~(sizeof(uint64_t) - 1);
-		if (offset >= 512 - sizeof(uint64_t))
-			offset -= sizeof(uint64_t);
-		memcpy(p + offset, &boffset, sizeof(boffset));
-
-		end = p + 512 - sizeof(io_u->start_time);
-		memcpy(end, &io_u->start_time, sizeof(io_u->start_time));
+		iptr = (void *) p + (offset << 6);
+		*iptr = boffset;
+
+		iptr = (void *) p + 64 - 2 * sizeof(uint64_t);
+		iptr[0] = io_u->start_time.tv_sec;
+		iptr[1] = io_u->start_time.tv_nsec;
+
 		p += 512;
 		boffset += 512;
 	}
@@ -1809,14 +1899,14 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
 			struct prof_io_ops *ops = &td->prof_io_ops;
 
 			if (ops->io_u_lat)
-				icd->error = ops->io_u_lat(td, tnsec/1000);
+				icd->error = ops->io_u_lat(td, tnsec);
 		}
 
-		if (td->o.max_latency && tnsec/1000 > td->o.max_latency)
-			lat_fatal(td, icd, tnsec/1000, td->o.max_latency);
-		if (td->o.latency_target && tnsec/1000 > td->o.latency_target) {
+		if (td->o.max_latency && tnsec > td->o.max_latency)
+			lat_fatal(td, icd, tnsec, td->o.max_latency);
+		if (td->o.latency_target && tnsec > td->o.latency_target) {
 			if (lat_target_failed(td))
-				lat_fatal(td, icd, tnsec/1000, td->o.latency_target);
+				lat_fatal(td, icd, tnsec, td->o.latency_target);
 		}
 	}
 
@@ -1831,7 +1921,8 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
 
 		if (no_reduce && per_unit_log(td->iops_log))
 			add_iops_sample(td, io_u, bytes);
-	}
+	} else if (ddir_sync(idx) && !td->o.disable_clat)
+		add_sync_clat_sample(&td->ts, llnsec);
 
 	if (td->ts.nr_block_infos && io_u->ddir == DDIR_TRIM) {
 		uint32_t *info = io_u_block_info(td, io_u);
@@ -1869,6 +1960,12 @@ static void file_log_write_comp(const struct thread_data *td, struct fio_file *f
 		f->last_write_idx = 0;
 }
 
+static bool should_account(struct thread_data *td)
+{
+	return ramp_time_over(td) && (td->runstate == TD_RUNNING ||
+					   td->runstate == TD_VERIFYING);
+}
+
 static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
 			 struct io_completion_data *icd)
 {
@@ -1876,7 +1973,7 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
 	enum fio_ddir ddir = io_u->ddir;
 	struct fio_file *f = io_u->file;
 
-	dprint_io_u(io_u, "io complete");
+	dprint_io_u(io_u, "complete");
 
 	assert(io_u->flags & IO_U_F_FLIGHT);
 	io_u_clear(td, io_u, IO_U_F_FLIGHT | IO_U_F_BUSY_OK);
@@ -1897,15 +1994,17 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
 	}
 
 	if (ddir_sync(ddir)) {
-		td->last_was_sync = 1;
+		td->last_was_sync = true;
 		if (f) {
 			f->first_write = -1ULL;
 			f->last_write = -1ULL;
 		}
+		if (should_account(td))
+			account_io_completion(td, io_u, icd, ddir, io_u->buflen);
 		return;
 	}
 
-	td->last_was_sync = 0;
+	td->last_was_sync = false;
 	td->last_ddir = ddir;
 
 	if (!io_u->error && ddir_rw(ddir)) {
@@ -1913,17 +2012,17 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
 		int ret;
 
 		td->io_blocks[ddir]++;
-		td->this_io_blocks[ddir]++;
 		td->io_bytes[ddir] += bytes;
 
-		if (!(io_u->flags & IO_U_F_VER_LIST))
+		if (!(io_u->flags & IO_U_F_VER_LIST)) {
+			td->this_io_blocks[ddir]++;
 			td->this_io_bytes[ddir] += bytes;
+		}
 
 		if (ddir == DDIR_WRITE)
 			file_log_write_comp(td, f, io_u->offset, bytes);
 
-		if (ramp_time_over(td) && (td->runstate == TD_RUNNING ||
-					   td->runstate == TD_VERIFYING))
+		if (should_account(td))
 			account_io_completion(td, io_u, icd, ddir, bytes);
 
 		icd->bytes_done[ddir] += bytes;