From 751548451d969148529b642c05e7544726ec3b64 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Jun 2006 13:56:09 +0200 Subject: [PATCH] [PATCH] Introduce an extra runstate for monitoring thread startup This way we can detect whether a thread is hanging in init, and take it down after a timeout. This would have caught the /dev/random problems fixed yesterday. Also limit the Client message printed at the beginnig, to only show the first and last of identical job types instead of everything in between. --- fio-ini.c | 13 +++++++---- fio.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 70 insertions(+), 11 deletions(-) diff --git a/fio-ini.c b/fio-ini.c index 6f866dfb..f5297576 100644 --- a/fio-ini.c +++ b/fio-ini.c @@ -165,7 +165,7 @@ static void put_job(struct thread_data *td) thread_number--; } -static int add_job(struct thread_data *td, const char *jobname) +static int add_job(struct thread_data *td, const char *jobname, int job_add_num) { char *ddir_str[] = { "read", "write", "randread", "randwrite", "rw", NULL, "randrw" }; @@ -249,7 +249,11 @@ static int add_job(struct thread_data *td, const char *jobname) setup_log(&td->bw_log); ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2); - printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth); + + if (!job_add_num) + printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth); + else if (job_add_num == 1) + printf("...\n"); /* * recurse add identical jobs, clear numjobs and stonewall options @@ -265,8 +269,9 @@ static int add_job(struct thread_data *td, const char *jobname) td_new->numjobs = 1; td_new->stonewall = 0; td_new->jobnum = numjobs; + job_add_num = numjobs - 1; - if (add_job(td_new, jobname)) + if (add_job(td_new, jobname, job_add_num)) goto err; } return 0; @@ -863,7 +868,7 @@ int parse_jobs_ini(char *file) } fsetpos(f, &off); - if (add_job(td, name)) + if (add_job(td, name, 0)) return 1; } diff --git a/fio.c b/fio.c index df284581..bedad68f 100644 --- a/fio.c +++ b/fio.c @@ -65,6 +65,7 @@ extern unsigned long long mlock_size; enum { TD_NOT_CREATED = 0, TD_CREATED, + TD_INITIALIZED, TD_RUNNING, TD_VERIFYING, TD_EXITED, @@ -76,6 +77,7 @@ enum { static sem_t startup_sem; #define TERMINATE_ALL (-1) +#define JOB_START_TIMEOUT (5 * 1000) static void terminate_threads(int group_id) { @@ -1829,15 +1831,16 @@ static void *thread_main(void *data) } } + if (init_random_state(td)) + goto err; + + td_set_runstate(td, TD_INITIALIZED); sem_post(&startup_sem); sem_wait(&td->mutex); if (!td->create_serialize && setup_file(td)) goto err; - if (init_random_state(td)) - goto err; - gettimeofday(&td->epoch, NULL); while (td->loops--) { @@ -2035,6 +2038,9 @@ static void check_str_update(struct thread_data *td) case TD_CREATED: c = 'C'; break; + case TD_INITIALIZED: + c = 'I'; + break; case TD_NOT_CREATED: c = 'P'; break; @@ -2106,7 +2112,8 @@ static int thread_eta(struct thread_data *td, unsigned long elapsed) if (td->timeout && eta_sec > (td->timeout - elapsed)) eta_sec = td->timeout - elapsed; - } else if (td->runstate == TD_NOT_CREATED || td->runstate == TD_CREATED) { + } else if (td->runstate == TD_NOT_CREATED || td->runstate == TD_CREATED + || td->runstate == TD_INITIALIZED) { int t_eta = 0, r_eta = 0; /* @@ -2317,6 +2324,10 @@ static void run_threads(void) gettimeofday(&genesis, NULL); while (todo) { + struct thread_data *map[MAX_JOBS]; + struct timeval this_start; + int this_jobs = 0, left; + /* * create threads (TD_NOT_CREATED -> TD_CREATED) */ @@ -2345,9 +2356,13 @@ static void run_threads(void) if (td->stonewall && (nr_started || nr_running)) break; + /* + * Set state to created. Thread will transition + * to TD_INITIALIZED when it's done setting up. + */ td_set_runstate(td, TD_CREATED); + map[this_jobs++] = td; sem_init(&startup_sem, 0, 1); - todo--; nr_started++; if (td->use_thread) { @@ -2366,12 +2381,50 @@ static void run_threads(void) } /* - * start created threads (TD_CREATED -> TD_RUNNING) + * Wait for the started threads to transition to + * TD_INITIALIZED. */ + printf("fio: Waiting for threads to initialize...\n"); + gettimeofday(&this_start, NULL); + left = this_jobs; + while (left) { + if (mtime_since_now(&this_start) > JOB_START_TIMEOUT) + break; + + usleep(100000); + + for (i = 0; i < this_jobs; i++) { + td = map[i]; + if (!td) + continue; + if (td->runstate == TD_INITIALIZED || + td->runstate >= TD_EXITED) { + map[i] = NULL; + left--; + continue; + } + } + } + + if (left) { + fprintf(stderr, "fio: %d jobs failed to start\n", left); + for (i = 0; i < this_jobs; i++) { + td = map[i]; + if (!td) + continue; + kill(td->pid, SIGTERM); + } + break; + } + + /* + * start created threads (TD_INITIALIZED -> TD_RUNNING) + */ + printf("fio: Go for launch\n"); for (i = 0; i < thread_number; i++) { td = &threads[i]; - if (td->runstate != TD_CREATED) + if (td->runstate != TD_INITIALIZED) continue; td_set_runstate(td, TD_RUNNING); @@ -2379,6 +2432,7 @@ static void run_threads(void) nr_started--; m_rate += td->ratemin; t_rate += td->rate; + todo--; sem_post(&td->mutex); } -- 2.25.1