X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=rate-submit.c;h=752c30a5f171d6f0e541ef79ce9fc5c341195d58;hp=3d35815212f5e1ca67b1b79990d78abf90579241;hb=HEAD;hpb=51575029ff4027f42ef6be374fd50b2cda5880b4

diff --git a/rate-submit.c b/rate-submit.c
index 3d358152..92be3df7 100644
--- a/rate-submit.c
+++ b/rate-submit.c
@@ -4,21 +4,76 @@
  * Copyright (C) 2015 Jens Axboe <axboe@kernel.dk>
  *
  */
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+
 #include "fio.h"
-#include "ioengine.h"
+#include "ioengines.h"
 #include "lib/getrusage.h"
+#include "rate-submit.h"
+
+static void check_overlap(struct io_u *io_u)
+{
+	int res;
+
+	/*
+	 * Allow only one thread to check for overlap at a time to prevent two
+	 * threads from thinking the coast is clear and then submitting IOs
+	 * that overlap with each other.
+	 *
+	 * If an overlap is found, release the lock and re-acquire it before
+	 * checking again to give other threads a chance to make progress.
+	 *
+	 * If no overlap is found, release the lock when the io_u's
+	 * IO_U_F_FLIGHT flag is set so that this io_u can be checked by other
+	 * threads as they assess overlap.
+	 */
+	res = pthread_mutex_lock(&overlap_check);
+	if (fio_unlikely(res != 0)) {
+		log_err("failed to lock overlap check mutex, err: %i:%s", errno, strerror(errno));
+		abort();
+	}
+
+retry:
+	for_each_td(td) {
+		if (td->runstate <= TD_SETTING_UP ||
+		    td->runstate >= TD_FINISHING ||
+		    !td->o.serialize_overlap ||
+		    td->o.io_submit_mode != IO_MODE_OFFLOAD)
+			continue;
+
+		if (!in_flight_overlap(&td->io_u_all, io_u))
+			continue;
+
+		res = pthread_mutex_unlock(&overlap_check);
+		if (fio_unlikely(res != 0)) {
+			log_err("failed to unlock overlap check mutex, err: %i:%s", errno, strerror(errno));
+			abort();
+		}
+		res = pthread_mutex_lock(&overlap_check);
+		if (fio_unlikely(res != 0)) {
+			log_err("failed to lock overlap check mutex, err: %i:%s", errno, strerror(errno));
+			abort();
+		}
+		goto retry;
+	} end_for_each();
+}
 
 static int io_workqueue_fn(struct submit_worker *sw,
 			   struct workqueue_work *work)
 {
 	struct io_u *io_u = container_of(work, struct io_u, work);
 	const enum fio_ddir ddir = io_u->ddir;
-	struct thread_data *td = sw->private;
-	int ret;
+	struct thread_data *td = sw->priv;
+	int ret, error;
+
+	if (td->o.serialize_overlap)
+		check_overlap(io_u);
 
 	dprint(FD_RATE, "io_u %p queued by %u\n", io_u, gettid());
 
-	io_u_set(io_u, IO_U_F_NO_FILE_PUT);
+	io_u_set(td, io_u, IO_U_F_NO_FILE_PUT);
 
 	td->cur_depth++;
 
@@ -29,12 +84,14 @@ static int io_workqueue_fn(struct submit_worker *sw,
 		ret = io_u_queued_complete(td, 1);
 		if (ret > 0)
 			td->cur_depth -= ret;
-		io_u_clear(io_u, IO_U_F_FLIGHT);
+		else if (ret < 0)
+			break;
+		io_u_clear(td, io_u, IO_U_F_FLIGHT);
 	} while (1);
 
 	dprint(FD_RATE, "io_u %p ret %d by %u\n", io_u, ret, gettid());
 
-	io_queue_event(td, io_u, &ret, ddir, NULL, 0, NULL);
+	error = io_queue_event(td, io_u, &ret, ddir, NULL, 0, NULL);
 
 	if (ret == FIO_Q_COMPLETED)
 		td->cur_depth--;
@@ -49,10 +106,12 @@ static int io_workqueue_fn(struct submit_worker *sw,
 		ret = io_u_queued_complete(td, min_evts);
 		if (ret > 0)
 			td->cur_depth -= ret;
-	} else if (ret == FIO_Q_BUSY) {
-		ret = io_u_queued_complete(td, td->cur_depth);
-		if (ret > 0)
-			td->cur_depth -= ret;
+	}
+
+	if (error || td->error) {
+		pthread_mutex_lock(&td->io_u_lock);
+		pthread_cond_signal(&td->parent->free_cond);
+		pthread_mutex_unlock(&td->io_u_lock);
 	}
 
 	return 0;
@@ -60,8 +119,10 @@ static int io_workqueue_fn(struct submit_worker *sw,
 
 static bool io_workqueue_pre_sleep_flush_fn(struct submit_worker *sw)
 {
-	struct thread_data *td = sw->private;
+	struct thread_data *td = sw->priv;
 
+	if (td->error)
+		return false;
 	if (td->io_u_queued || td->cur_depth || td->io_u_in_flight)
 		return true;
 
@@ -70,7 +131,7 @@ static bool io_workqueue_pre_sleep_flush_fn(struct submit_worker *sw)
 
 static void io_workqueue_pre_sleep_fn(struct submit_worker *sw)
 {
-	struct thread_data *td = sw->private;
+	struct thread_data *td = sw->priv;
 	int ret;
 
 	ret = io_u_quiesce(td);
@@ -83,21 +144,20 @@ static int io_workqueue_alloc_fn(struct submit_worker *sw)
 	struct thread_data *td;
 
 	td = calloc(1, sizeof(*td));
-	sw->private = td;
+	sw->priv = td;
 	return 0;
 }
 
 static void io_workqueue_free_fn(struct submit_worker *sw)
 {
-	free(sw->private);
-	sw->private = NULL;
+	free(sw->priv);
+	sw->priv = NULL;
 }
 
 static int io_workqueue_init_worker_fn(struct submit_worker *sw)
 {
 	struct thread_data *parent = sw->wq->td;
-	struct thread_data *td = sw->private;
-	int fio_unused ret;
+	struct thread_data *td = sw->priv;
 
 	memcpy(&td->o, &parent->o, sizeof(td->o));
 	memcpy(&td->ts, &parent->ts, sizeof(td->ts));
@@ -105,32 +165,32 @@ static int io_workqueue_init_worker_fn(struct submit_worker *sw)
 	dup_files(td, parent);
 	td->eo = parent->eo;
 	fio_options_mem_dupe(td);
+	td->iolog_f = parent->iolog_f;
 
 	if (ioengine_load(td))
 		goto err;
 
-	if (td->o.odirect)
-		td->io_ops->flags |= FIO_RAWIO;
-
 	td->pid = gettid();
 
 	INIT_FLIST_HEAD(&td->io_log_list);
 	INIT_FLIST_HEAD(&td->io_hist_list);
 	INIT_FLIST_HEAD(&td->verify_list);
 	INIT_FLIST_HEAD(&td->trim_list);
-	INIT_FLIST_HEAD(&td->next_rand_list);
 	td->io_hist_tree = RB_ROOT;
 
 	td->o.iodepth = 1;
 	if (td_io_init(td))
 		goto err_io_init;
 
-	fio_gettime(&td->epoch, NULL);
+	if (td->io_ops->post_init && td->io_ops->post_init(td))
+		goto err_io_init;
+
+	set_epoch_time(td, td->o.log_alternate_epoch_clock_id, td->o.job_start_clock_id);
 	fio_getrusage(&td->ru_start);
 	clear_io_state(td, 1);
 
 	td_set_runstate(td, TD_RUNNING);
-	td->flags |= TD_F_CHILD;
+	td->flags |= TD_F_CHILD | TD_F_NEED_LOCK;
 	td->parent = parent;
 	return 0;
 
@@ -144,10 +204,19 @@ err:
 static void io_workqueue_exit_worker_fn(struct submit_worker *sw,
 					unsigned int *sum_cnt)
 {
-	struct thread_data *td = sw->private;
+	struct thread_data *td = sw->priv;
 
 	(*sum_cnt)++;
-	sum_thread_stats(&sw->wq->td->ts, &td->ts, *sum_cnt == 1);
+
+	/*
+	 * io_workqueue_update_acct_fn() doesn't support per prio stats, and
+	 * even if it did, offload can't be used with all async IO engines.
+	 * If group reporting is set in the parent td, the group result
+	 * generated by __show_run_stats() can still contain multiple prios
+	 * from different offloaded jobs.
+	 */
+	sw->wq->td->ts.disable_prio_stat = 1;
+	sum_thread_stats(&sw->wq->td->ts, &td->ts);
 
 	fio_options_free(td);
 	close_and_free_files(td);
@@ -206,13 +275,15 @@ static void sum_ddir(struct thread_data *dst, struct thread_data *src,
 	sum_val(&dst->this_io_blocks[ddir], &src->this_io_blocks[ddir]);
 	sum_val(&dst->this_io_bytes[ddir], &src->this_io_bytes[ddir]);
 	sum_val(&dst->bytes_done[ddir], &src->bytes_done[ddir]);
+	if (ddir == DDIR_READ)
+		sum_val(&dst->bytes_verified, &src->bytes_verified);
 
 	pthread_double_unlock(&dst->io_wq.stat_lock, &src->io_wq.stat_lock);
 }
 
 static void io_workqueue_update_acct_fn(struct submit_worker *sw)
 {
-	struct thread_data *src = sw->private;
+	struct thread_data *src = sw->priv;
 	struct thread_data *dst = sw->wq->td;
 
 	if (td_read(src))
@@ -224,7 +295,7 @@ static void io_workqueue_update_acct_fn(struct submit_worker *sw)
 
 }
 
-struct workqueue_ops rated_wq_ops = {
+static struct workqueue_ops rated_wq_ops = {
 	.fn			= io_workqueue_fn,
 	.pre_sleep_flush_fn	= io_workqueue_pre_sleep_flush_fn,
 	.pre_sleep_fn		= io_workqueue_pre_sleep_fn,
@@ -234,3 +305,19 @@ struct workqueue_ops rated_wq_ops = {
 	.init_worker_fn		= io_workqueue_init_worker_fn,
 	.exit_worker_fn		= io_workqueue_exit_worker_fn,
 };
+
+int rate_submit_init(struct thread_data *td, struct sk_out *sk_out)
+{
+	if (td->o.io_submit_mode != IO_MODE_OFFLOAD)
+		return 0;
+
+	return workqueue_init(td, &td->io_wq, &rated_wq_ops, td->o.iodepth, sk_out);
+}
+
+void rate_submit_exit(struct thread_data *td)
+{
+	if (td->o.io_submit_mode != IO_MODE_OFFLOAD)
+		return;
+
+	workqueue_exit(&td->io_wq);
+}