X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=fio.c;h=6e78949b52fd7771b94d31bc0b14c0575e18c84c;hp=e8a86585d4bd5d4ee83cff9202eca1ef8a3a58a9;hb=9978463287982c2f73f171ec9a79979939dee47a;hpb=5bf13a5a5f01986c5b87db26f0cadf95af133188

diff --git a/fio.c b/fio.c
index e8a86585..6e78949b 100644
--- a/fio.c
+++ b/fio.c
@@ -27,6 +27,7 @@
 #include <signal.h>
 #include <time.h>
 #include <locale.h>
+#include <assert.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/ipc.h>
@@ -162,20 +163,14 @@ static struct fio_file *get_next_file(struct thread_data *td)
  */
 static void cleanup_pending_aio(struct thread_data *td)
 {
-	struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
 	struct list_head *entry, *n;
-	struct io_completion_data icd;
 	struct io_u *io_u;
 	int r;
 
 	/*
 	 * get immediately available events, if any
 	 */
-	r = td_io_getevents(td, 0, td->cur_depth, &ts);
-	if (r > 0) {
-		icd.nr = r;
-		ios_completed(td, &icd);
-	}
+	io_u_queued_complete(td, 0, NULL);
 
 	/*
 	 * now cancel remaining active events
@@ -190,13 +185,8 @@ static void cleanup_pending_aio(struct thread_data *td)
 		}
 	}
 
-	if (td->cur_depth) {
-		r = td_io_getevents(td, td->cur_depth, td->cur_depth, NULL);
-		if (r > 0) {
-			icd.nr = r;
-			ios_completed(td, &icd);
-		}
-	}
+	if (td->cur_depth)
+		io_u_queued_complete(td, td->cur_depth, NULL);
 }
 
 /*
@@ -206,7 +196,6 @@ static void cleanup_pending_aio(struct thread_data *td)
 static int fio_io_sync(struct thread_data *td, struct fio_file *f)
 {
 	struct io_u *io_u = __get_io_u(td);
-	struct io_completion_data icd;
 	int ret;
 
 	if (!io_u)
@@ -221,23 +210,20 @@ static int fio_io_sync(struct thread_data *td, struct fio_file *f)
 	}
 
 	ret = td_io_queue(td, io_u);
-	if (ret) {
+	if (ret < 0) {
 		td_verror(td, io_u->error);
 		put_io_u(td, io_u);
 		return 1;
-	}
-
-	ret = td_io_getevents(td, 1, td->cur_depth, NULL);
-	if (ret < 0) {
-		td_verror(td, ret);
-		return 1;
-	}
+	} else if (ret == FIO_Q_QUEUED) {
+		if (io_u_queued_complete(td, 1, NULL))
+			return 1;
+	} else if (ret == FIO_Q_COMPLETED) {
+		if (io_u->error) {
+			td_verror(td, io_u->error);
+			return 1;
+		}
 
-	icd.nr = ret;
-	ios_completed(td, &icd);
-	if (icd.error) {
-		td_verror(td, icd.error);
-		return 1;
+		io_u_sync_complete(td, io_u, NULL);
 	}
 
 	return 0;
@@ -249,10 +235,9 @@ static int fio_io_sync(struct thread_data *td, struct fio_file *f)
  */
 static void do_verify(struct thread_data *td)
 {
-	struct io_u *io_u, *v_io_u = NULL;
-	struct io_completion_data icd;
 	struct fio_file *f;
-	int ret, i;
+	struct io_u *io_u;
+	int ret, i, min_events;
 
 	/*
 	 * sync io first and invalidate cache, to make sure we really
@@ -265,78 +250,68 @@ static void do_verify(struct thread_data *td)
 
 	td_set_runstate(td, TD_VERIFYING);
 
-	do {
-		if (td->terminate)
-			break;
-
+	io_u = NULL;
+	while (!td->terminate) {
 		io_u = __get_io_u(td);
 		if (!io_u)
 			break;
 
-		if (runtime_exceeded(td, &io_u->start_time)) {
-			put_io_u(td, io_u);
+		if (runtime_exceeded(td, &io_u->start_time))
 			break;
-		}
 
-		if (get_next_verify(td, io_u)) {
-			put_io_u(td, io_u);
+		if (get_next_verify(td, io_u))
 			break;
-		}
 
-		f = get_next_file(td);
-		if (!f)
+		if (td_io_prep(td, io_u))
 			break;
 
-		io_u->file = f;
+requeue:
+		ret = td_io_queue(td, io_u);
 
-		if (td_io_prep(td, io_u)) {
-			put_io_u(td, io_u);
+		switch (ret) {
+		case FIO_Q_COMPLETED:
+			if (io_u->error)
+				ret = -io_u->error;
+			if (io_u->xfer_buflen != io_u->resid && io_u->resid) {
+				int bytes = io_u->xfer_buflen - io_u->resid;
+
+				io_u->xfer_buflen = io_u->resid;
+				io_u->xfer_buf += bytes;
+				goto requeue;
+			}
+			ret = io_u_sync_complete(td, io_u, verify_io_u);
+			if (ret)
+				break;
+			continue;
+		case FIO_Q_QUEUED:
+			break;
+		default:
+			assert(ret < 0);
+			td_verror(td, -ret);
 			break;
 		}
 
-		ret = td_io_queue(td, io_u);
-		if (ret) {
-			td_verror(td, io_u->error);
-			put_io_u(td, io_u);
+		if (ret < 0 || td->error)
 			break;
-		}
 
 		/*
-		 * we have one pending to verify, do that while
-		 * we are doing io on the next one
+		 * if we can queue more, do so. but check if there are
+		 * completed io_u's first.
 		 */
-		if (do_io_u_verify(td, &v_io_u))
-			break;
-
-		ret = td_io_getevents(td, 1, 1, NULL);
-		if (ret != 1) {
-			if (ret < 0)
-				td_verror(td, ret);
-			break;
-		}
-
-		v_io_u = td->io_ops->event(td, 0);
-		icd.nr = 1;
-		icd.error = 0;
-		fio_gettime(&icd.time, NULL);
-		io_completed(td, v_io_u, &icd);
-
-		if (icd.error) {
-			td_verror(td, icd.error);
-			put_io_u(td, v_io_u);
-			v_io_u = NULL;
-			break;
-		}
+		min_events = 0;
+		if (queue_full(td))
+			min_events = 1;
 
 		/*
-		 * if we can't submit more io, we need to verify now
+		 * Reap required number of io units, if any, and do the
+		 * verification on them through the callback handler
 		 */
-		if (queue_full(td) && do_io_u_verify(td, &v_io_u))
+		if (io_u_queued_complete(td, min_events, verify_io_u))
 			break;
+	}
 
-	} while (1);
-
-	do_io_u_verify(td, &v_io_u);
+	if (io_u)
+		put_io_u(td, io_u);
 
 	if (td->cur_depth)
 		cleanup_pending_aio(td);
@@ -375,7 +350,6 @@ static void do_cpuio(struct thread_data *td)
  */
 static void do_io(struct thread_data *td)
 {
-	struct io_completion_data icd;
 	struct timeval s;
 	unsigned long usec;
 	struct fio_file *f;
@@ -384,7 +358,8 @@ static void do_io(struct thread_data *td)
 	td_set_runstate(td, TD_RUNNING);
 
 	while ((td->this_io_bytes[0] + td->this_io_bytes[1]) < td->io_size) {
-		struct timespec *timeout;
+		struct timeval comp_time;
+		long bytes_done = 0;
 		int min_evts = 0;
 		struct io_u *io_u;
 
@@ -400,74 +375,92 @@ static void do_io(struct thread_data *td)
 			break;
 
 		memcpy(&s, &io_u->start_time, sizeof(s));
+
+		if (runtime_exceeded(td, &s)) {
+			put_io_u(td, io_u);
+			break;
+		}
 requeue:
 		ret = td_io_queue(td, io_u);
-		if (ret) {
-			if (ret > 0 && (io_u->xfer_buflen != io_u->resid) &&
-			    io_u->resid) {
-				/*
-				 * short read/write. requeue.
-				 */
+
+		switch (ret) {
+		case FIO_Q_COMPLETED:
+			if (io_u->error) {
+				ret = io_u->error;
+				break;
+			}
+			if (io_u->xfer_buflen != io_u->resid && io_u->resid) {
+				int bytes = io_u->xfer_buflen - io_u->resid;
+
 				io_u->xfer_buflen = io_u->resid;
-				io_u->xfer_buf += ret;
+				io_u->xfer_buf += bytes;
 				goto requeue;
-			} else {
-				put_io_u(td, io_u);
-				break;
 			}
+			fio_gettime(&comp_time, NULL);
+			bytes_done = io_u_sync_complete(td, io_u, NULL);
+			if (bytes_done < 0)
+				ret = bytes_done;
+			break;
+		case FIO_Q_QUEUED:
+			break;
+		default:
+			assert(ret < 0);
+			put_io_u(td, io_u);
+			break;
 		}
 
-		add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
+		if (ret < 0 || td->error)
+			break;
 
-		if (td->cur_depth < td->iodepth) {
-			struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
+		add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
 
-			timeout = &ts;
+		/*
+		 * See if we need to complete some commands
+		 */
+		if (ret == FIO_Q_QUEUED) {
 			min_evts = 0;
-		} else {
-			timeout = NULL;
-			min_evts = 1;
+			if (queue_full(td))
+				min_evts = 1;
+
+			fio_gettime(&comp_time, NULL);
+			bytes_done = io_u_queued_complete(td, min_evts, NULL);
+			if (bytes_done < 0)
+				break;
 		}
 
-		ret = td_io_getevents(td, min_evts, td->cur_depth, timeout);
-		if (ret < 0) {
-			td_verror(td, ret);
-			break;
-		} else if (!ret)
+		if (!bytes_done)
 			continue;
 
-		icd.nr = ret;
-		ios_completed(td, &icd);
-		if (icd.error) {
-			td_verror(td, icd.error);
-			break;
-		}
-
 		/*
 		 * the rate is batched for now, it should work for batches
 		 * of completions except the very first one which may look
 		 * a little bursty
 		 */
-		usec = utime_since(&s, &icd.time);
+		usec = utime_since(&s, &comp_time);
 
-		rate_throttle(td, usec, icd.bytes_done[td->ddir], td->ddir);
+		rate_throttle(td, usec, bytes_done, td->ddir);
 
-		if (check_min_rate(td, &icd.time)) {
+		if (check_min_rate(td, &comp_time)) {
 			if (exitall_on_terminate)
 				terminate_threads(td->groupid, 0);
 			td_verror(td, ENODATA);
 			break;
 		}
 
-		if (runtime_exceeded(td, &icd.time))
-			break;
-
 		if (td->thinktime) {
 			unsigned long long b;
 
 			b = td->io_blocks[0] + td->io_blocks[1];
-			if (!(b % td->thinktime_blocks))
-				usec_sleep(td, td->thinktime);
+			if (!(b % td->thinktime_blocks)) {
+				int left;
+
+				if (td->thinktime_spin)
+					__usec_sleep(td->thinktime_spin);
+
+				left = td->thinktime - td->thinktime_spin;
+				if (left)
+					usec_sleep(td, left);
+			}
 		}
 	}
 
@@ -761,28 +754,30 @@ err:
 	close_ioengine(td);
 	cleanup_io_u(td);
 	td_set_runstate(td, TD_EXITED);
-	return NULL;
+	return (void *) td->error;
 }
 
 /*
  * We cannot pass the td data into a forked process, so attach the td and
  * pass it to the thread worker.
  */
-static void *fork_main(int shmid, int offset)
+static int fork_main(int shmid, int offset)
 {
 	struct thread_data *td;
-	void *data;
+	void *data, *ret;
 
 	data = shmat(shmid, NULL, 0);
 	if (data == (void *) -1) {
+		int __err = errno;
+
 		perror("shmat");
-		return NULL;
+		return __err;
 	}
 
 	td = data + offset * sizeof(struct thread_data);
-	thread_main(td);
+	ret = thread_main(td);
 	shmdt(data);
-	return NULL;
+	return (int) ret;
 }
 
 /*
@@ -791,7 +786,7 @@ static void *fork_main(int shmid, int offset)
 static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
 {
 	struct thread_data *td;
-	int i, cputhreads, pending;
+	int i, cputhreads, pending, status, ret;
 
 	/*
 	 * reap exited threads (TD_EXITED -> TD_REAPED)
@@ -805,6 +800,22 @@ static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
 		if (td->io_ops && td->io_ops->flags & FIO_CPUIO)
 			cputhreads++;
 
+		if (td->runstate < TD_EXITED) {
+			/*
+			 * check if someone quit or got killed in an unusual way
+			 */
+			ret = waitpid(td->pid, &status, WNOHANG);
+			if (ret < 0)
+				perror("waitpid");
+			else if ((ret == td->pid) && WIFSIGNALED(status)) {
+				int sig = WTERMSIG(status);
+
+				log_err("fio: pid=%d, got signal=%d\n", td->pid, sig);
+				td_set_runstate(td, TD_REAPED);
+				goto reaped;
+			}
+		}
+
 		if (td->runstate != TD_EXITED) {
 			if (td->runstate < TD_RUNNING)
 				pending++;
@@ -822,9 +833,19 @@ static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
 
 			if (pthread_join(td->thread, (void *) &ret))
 				perror("thread_join");
-		} else
-			waitpid(td->pid, NULL, 0);
+		} else {
+			int status;
+
+			ret = waitpid(td->pid, &status, 0);
+			if (ret < 0)
+				perror("waitpid");
+			else if (WIFEXITED(status) && WEXITSTATUS(status)) {
+				if (!exit_value)
+					exit_value++;
+			}
+		}
 
+reaped:
 		(*nr_running)--;
 		(*m_rate) -= td->ratemin;
 		(*t_rate) -= td->rate;
@@ -932,8 +953,9 @@ static void run_threads(void)
 				if (fork())
 					fio_sem_down(&startup_sem);
 				else {
-					fork_main(shm_id, i);
-					exit(0);
+					int ret = fork_main(shm_id, i);
+
+					exit(ret);
 				}
 			}
 		}