io_u: speed up small_content_scramble()
[fio.git] / idletime.c
CommitLineData
f2a2ce0e
HL
1#include <math.h>
2#include "json.h"
3#include "idletime.h"
4
5static volatile struct idle_prof_common ipc;
6
680b5abc
JA
7/*
8 * Get time to complete an unit work on a particular cpu.
f2a2ce0e
HL
9 * The minimum number in CALIBRATE_RUNS runs is returned.
10 */
11static double calibrate_unit(unsigned char *data)
12{
13 unsigned long t, i, j, k;
8b6a404c 14 struct timespec tps;
f2a2ce0e
HL
15 double tunit = 0.0;
16
680b5abc 17 for (i = 0; i < CALIBRATE_RUNS; i++) {
f2a2ce0e
HL
18
19 fio_gettime(&tps, NULL);
20 /* scale for less variance */
680b5abc 21 for (j = 0; j < CALIBRATE_SCALE; j++) {
f2a2ce0e
HL
22 /* unit of work */
23 for (k=0; k < page_size; k++) {
680b5abc
JA
24 data[(k + j) % page_size] = k % 256;
25 /*
26 * we won't see STOP here. this is to match
f2a2ce0e
HL
27 * the same statement in the profiling loop.
28 */
29 if (ipc.status == IDLE_PROF_STATUS_PROF_STOP)
30 return 0.0;
31 }
32 }
33
34 t = utime_since_now(&tps);
35 if (!t)
36 continue;
37
38 /* get the minimum time to complete CALIBRATE_SCALE units */
680b5abc 39 if ((i == 0) || ((double)t < tunit))
f2a2ce0e
HL
40 tunit = (double)t;
41 }
42
680b5abc 43 return tunit / CALIBRATE_SCALE;
f2a2ce0e
HL
44}
45
54ed125b
JA
46static void free_cpu_affinity(struct idle_prof_thread *ipt)
47{
48#if defined(FIO_HAVE_CPU_AFFINITY)
49 fio_cpuset_exit(&ipt->cpu_mask);
50#endif
51}
52
59358c8e
JA
53static int set_cpu_affinity(struct idle_prof_thread *ipt)
54{
55#if defined(FIO_HAVE_CPU_AFFINITY)
54ed125b
JA
56 if (fio_cpuset_init(&ipt->cpu_mask)) {
57 log_err("fio: cpuset init failed\n");
58 return -1;
59 }
59358c8e 60
54ed125b 61 fio_cpu_set(&ipt->cpu_mask, ipt->cpu);
59358c8e 62
54ed125b 63 if (fio_setaffinity(gettid(), ipt->cpu_mask)) {
59358c8e 64 log_err("fio: fio_setaffinity failed\n");
54ed125b 65 fio_cpuset_exit(&ipt->cpu_mask);
59358c8e
JA
66 return -1;
67 }
68
69 return 0;
70#else
71 log_err("fio: fio_setaffinity not supported\n");
72 return -1;
73#endif
74}
75
f2a2ce0e
HL
76static void *idle_prof_thread_fn(void *data)
77{
78 int retval;
79 unsigned long j, k;
80 struct idle_prof_thread *ipt = data;
81
82 /* wait for all threads are spawned */
83 pthread_mutex_lock(&ipt->init_lock);
84
85 /* exit if any other thread failed to start */
d5b351d4
JA
86 if (ipc.status == IDLE_PROF_STATUS_ABORT) {
87 pthread_mutex_unlock(&ipt->init_lock);
f2a2ce0e 88 return NULL;
d5b351d4 89 }
f2a2ce0e 90
59358c8e 91 retval = set_cpu_affinity(ipt);
f2a2ce0e
HL
92 if (retval == -1) {
93 ipt->state = TD_EXITED;
94 pthread_mutex_unlock(&ipt->init_lock);
95 return NULL;
96 }
97
98 ipt->cali_time = calibrate_unit(ipt->data);
99
100 /* delay to set IDLE class till now for better calibration accuracy */
7e09a9f1 101#if defined(CONFIG_SCHED_IDLE)
f2a2ce0e
HL
102 if ((retval = fio_set_sched_idle()))
103 log_err("fio: fio_set_sched_idle failed\n");
104#else
105 retval = -1;
106 log_err("fio: fio_set_sched_idle not supported\n");
107#endif
108 if (retval == -1) {
109 ipt->state = TD_EXITED;
110 pthread_mutex_unlock(&ipt->init_lock);
54ed125b 111 goto do_exit;
f2a2ce0e
HL
112 }
113
114 ipt->state = TD_INITIALIZED;
115
116 /* signal the main thread that calibration is done */
117 pthread_cond_signal(&ipt->cond);
118 pthread_mutex_unlock(&ipt->init_lock);
119
120 /* wait for other calibration to finish */
121 pthread_mutex_lock(&ipt->start_lock);
122
123 /* exit if other threads failed to initialize */
735ed278
JA
124 if (ipc.status == IDLE_PROF_STATUS_ABORT) {
125 pthread_mutex_unlock(&ipt->start_lock);
54ed125b 126 goto do_exit;
735ed278 127 }
f2a2ce0e
HL
128
129 /* exit if we are doing calibration only */
735ed278
JA
130 if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) {
131 pthread_mutex_unlock(&ipt->start_lock);
54ed125b 132 goto do_exit;
735ed278 133 }
f2a2ce0e
HL
134
135 fio_gettime(&ipt->tps, NULL);
136 ipt->state = TD_RUNNING;
137
138 j = 0;
139 while (1) {
680b5abc
JA
140 for (k = 0; k < page_size; k++) {
141 ipt->data[(k + j) % page_size] = k % 256;
f2a2ce0e
HL
142 if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) {
143 fio_gettime(&ipt->tpe, NULL);
144 goto idle_prof_done;
145 }
146 }
147 j++;
148 }
149
150idle_prof_done:
151
680b5abc 152 ipt->loops = j + (double) k / page_size;
f2a2ce0e
HL
153 ipt->state = TD_EXITED;
154 pthread_mutex_unlock(&ipt->start_lock);
155
54ed125b
JA
156do_exit:
157 free_cpu_affinity(ipt);
f2a2ce0e
HL
158 return NULL;
159}
160
161/* calculate mean and standard deviation to complete an unit of work */
162static void calibration_stats(void)
163{
164 int i;
680b5abc 165 double sum = 0.0, var = 0.0;
f2a2ce0e
HL
166 struct idle_prof_thread *ipt;
167
168 for (i = 0; i < ipc.nr_cpus; i++) {
169 ipt = &ipc.ipts[i];
170 sum += ipt->cali_time;
171 }
172
173 ipc.cali_mean = sum/ipc.nr_cpus;
174
175 for (i = 0; i < ipc.nr_cpus; i++) {
176 ipt = &ipc.ipts[i];
177 var += pow(ipt->cali_time-ipc.cali_mean, 2);
178 }
179
180 ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1));
181}
182
183void fio_idle_prof_init(void)
184{
185 int i, ret;
f2a2ce0e 186 struct timespec ts;
680b5abc 187 pthread_attr_t tattr;
f2a2ce0e
HL
188 struct idle_prof_thread *ipt;
189
190 ipc.nr_cpus = cpus_online();
191 ipc.status = IDLE_PROF_STATUS_OK;
192
193 if (ipc.opt == IDLE_PROF_OPT_NONE)
194 return;
195
196 if ((ret = pthread_attr_init(&tattr))) {
197 log_err("fio: pthread_attr_init %s\n", strerror(ret));
198 return;
199 }
200 if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) {
201 log_err("fio: pthread_attr_setscope %s\n", strerror(ret));
202 return;
203 }
204
205 ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread));
206 if (!ipc.ipts) {
207 log_err("fio: malloc failed\n");
208 return;
209 }
210
211 ipc.buf = malloc(ipc.nr_cpus * page_size);
212 if (!ipc.buf) {
213 log_err("fio: malloc failed\n");
214 free(ipc.ipts);
215 return;
216 }
217
680b5abc
JA
218 /*
219 * profiling aborts on any single thread failure since the
f2a2ce0e
HL
220 * result won't be accurate if any cpu is not used.
221 */
222 for (i = 0; i < ipc.nr_cpus; i++) {
223 ipt = &ipc.ipts[i];
224
225 ipt->cpu = i;
226 ipt->state = TD_NOT_CREATED;
227 ipt->data = (unsigned char *)(ipc.buf + page_size * i);
228
229 if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) {
230 ipc.status = IDLE_PROF_STATUS_ABORT;
231 log_err("fio: pthread_mutex_init %s\n", strerror(ret));
232 break;
233 }
234
235 if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) {
236 ipc.status = IDLE_PROF_STATUS_ABORT;
237 log_err("fio: pthread_mutex_init %s\n", strerror(ret));
238 break;
239 }
240
241 if ((ret = pthread_cond_init(&ipt->cond, NULL))) {
242 ipc.status = IDLE_PROF_STATUS_ABORT;
243 log_err("fio: pthread_cond_init %s\n", strerror(ret));
244 break;
245 }
246
247 /* make sure all threads are spawned before they start */
248 pthread_mutex_lock(&ipt->init_lock);
249
250 /* make sure all threads finish init before profiling starts */
251 pthread_mutex_lock(&ipt->start_lock);
252
253 if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) {
254 ipc.status = IDLE_PROF_STATUS_ABORT;
255 log_err("fio: pthread_create %s\n", strerror(ret));
256 break;
680b5abc 257 } else
f2a2ce0e 258 ipt->state = TD_CREATED;
f2a2ce0e
HL
259
260 if ((ret = pthread_detach(ipt->thread))) {
261 /* log error and let the thread spin */
5bb39b7c 262 log_err("fio: pthread_detach %s\n", strerror(ret));
f2a2ce0e
HL
263 }
264 }
265
680b5abc
JA
266 /*
267 * let good threads continue so that they can exit
268 * if errors on other threads occurred previously.
f2a2ce0e
HL
269 */
270 for (i = 0; i < ipc.nr_cpus; i++) {
271 ipt = &ipc.ipts[i];
272 pthread_mutex_unlock(&ipt->init_lock);
273 }
274
275 if (ipc.status == IDLE_PROF_STATUS_ABORT)
276 return;
277
278 /* wait for calibration to finish */
279 for (i = 0; i < ipc.nr_cpus; i++) {
280 ipt = &ipc.ipts[i];
281 pthread_mutex_lock(&ipt->init_lock);
680b5abc
JA
282 while ((ipt->state != TD_EXITED) &&
283 (ipt->state!=TD_INITIALIZED)) {
8b6a404c
VF
284 fio_gettime(&ts, NULL);
285 ts.tv_sec += 1;
f2a2ce0e
HL
286 pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts);
287 }
288 pthread_mutex_unlock(&ipt->init_lock);
289
680b5abc
JA
290 /*
291 * any thread failed to initialize would abort other threads
f2a2ce0e
HL
292 * later after fio_idle_prof_start.
293 */
294 if (ipt->state == TD_EXITED)
295 ipc.status = IDLE_PROF_STATUS_ABORT;
296 }
297
298 if (ipc.status != IDLE_PROF_STATUS_ABORT)
299 calibration_stats();
680b5abc 300 else
f2a2ce0e
HL
301 ipc.cali_mean = ipc.cali_stddev = 0.0;
302
303 if (ipc.opt == IDLE_PROF_OPT_CALI)
304 ipc.status = IDLE_PROF_STATUS_CALI_STOP;
305}
306
307void fio_idle_prof_start(void)
308{
309 int i;
310 struct idle_prof_thread *ipt;
311
312 if (ipc.opt == IDLE_PROF_OPT_NONE)
313 return;
314
315 /* unlock regardless abort is set or not */
316 for (i = 0; i < ipc.nr_cpus; i++) {
317 ipt = &ipc.ipts[i];
318 pthread_mutex_unlock(&ipt->start_lock);
319 }
320}
321
322void fio_idle_prof_stop(void)
323{
324 int i;
325 uint64_t runt;
f2a2ce0e
HL
326 struct timespec ts;
327 struct idle_prof_thread *ipt;
328
329 if (ipc.opt == IDLE_PROF_OPT_NONE)
330 return;
331
332 if (ipc.opt == IDLE_PROF_OPT_CALI)
333 return;
334
335 ipc.status = IDLE_PROF_STATUS_PROF_STOP;
336
337 /* wait for all threads to exit from profiling */
338 for (i = 0; i < ipc.nr_cpus; i++) {
339 ipt = &ipc.ipts[i];
340 pthread_mutex_lock(&ipt->start_lock);
680b5abc
JA
341 while ((ipt->state != TD_EXITED) &&
342 (ipt->state!=TD_NOT_CREATED)) {
8b6a404c
VF
343 fio_gettime(&ts, NULL);
344 ts.tv_sec += 1;
f2a2ce0e
HL
345 /* timed wait in case a signal is not received */
346 pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts);
347 }
348 pthread_mutex_unlock(&ipt->start_lock);
349
350 /* calculate idleness */
351 if (ipc.cali_mean != 0.0) {
352 runt = utime_since(&ipt->tps, &ipt->tpe);
9da67e75
JA
353 if (runt)
354 ipt->idleness = ipt->loops * ipc.cali_mean / runt;
355 else
356 ipt->idleness = 0.0;
680b5abc 357 } else
f2a2ce0e
HL
358 ipt->idleness = 0.0;
359 }
360
680b5abc
JA
361 /*
362 * memory allocations are freed via explicit fio_idle_prof_cleanup
f2a2ce0e
HL
363 * after profiling stats are collected by apps.
364 */
f2a2ce0e
HL
365}
366
680b5abc
JA
367/*
368 * return system idle percentage when cpu is -1;
f2a2ce0e
HL
369 * return one cpu idle percentage otherwise.
370 */
371static double fio_idle_prof_cpu_stat(int cpu)
372{
373 int i, nr_cpus = ipc.nr_cpus;
374 struct idle_prof_thread *ipt;
375 double p = 0.0;
376
377 if (ipc.opt == IDLE_PROF_OPT_NONE)
378 return 0.0;
379
380 if ((cpu >= nr_cpus) || (cpu < -1)) {
381 log_err("fio: idle profiling invalid cpu index\n");
382 return 0.0;
383 }
384
385 if (cpu == -1) {
386 for (i = 0; i < nr_cpus; i++) {
387 ipt = &ipc.ipts[i];
388 p += ipt->idleness;
389 }
390 p /= nr_cpus;
391 } else {
392 ipt = &ipc.ipts[cpu];
393 p = ipt->idleness;
394 }
395
680b5abc 396 return p * 100.0;
f2a2ce0e
HL
397}
398
10aa136b 399static void fio_idle_prof_cleanup(void)
f2a2ce0e
HL
400{
401 if (ipc.ipts) {
402 free(ipc.ipts);
403 ipc.ipts = NULL;
404 }
405
406 if (ipc.buf) {
407 free(ipc.buf);
408 ipc.buf = NULL;
409 }
410}
411
412int fio_idle_prof_parse_opt(const char *args)
413{
414 ipc.opt = IDLE_PROF_OPT_NONE; /* default */
415
416 if (!args) {
417 log_err("fio: empty idle-prof option string\n");
418 return -1;
419 }
420
7e09a9f1 421#if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE)
f2a2ce0e
HL
422 if (strcmp("calibrate", args) == 0) {
423 ipc.opt = IDLE_PROF_OPT_CALI;
424 fio_idle_prof_init();
425 fio_idle_prof_start();
426 fio_idle_prof_stop();
a666cab8 427 show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL, NULL);
f2a2ce0e
HL
428 return 1;
429 } else if (strcmp("system", args) == 0) {
430 ipc.opt = IDLE_PROF_OPT_SYSTEM;
431 return 0;
432 } else if (strcmp("percpu", args) == 0) {
433 ipc.opt = IDLE_PROF_OPT_PERCPU;
434 return 0;
435 } else {
4e0a8fa2 436 log_err("fio: incorrect idle-prof option: %s\n", args);
f2a2ce0e
HL
437 return -1;
438 }
439#else
440 log_err("fio: idle-prof not supported on this platform\n");
441 return -1;
442#endif
443}
444
a666cab8
JA
445void show_idle_prof_stats(int output, struct json_object *parent,
446 struct buf_output *out)
f2a2ce0e
HL
447{
448 int i, nr_cpus = ipc.nr_cpus;
449 struct json_object *tmp;
450 char s[MAX_CPU_STR_LEN];
680b5abc 451
f2a2ce0e
HL
452 if (output == FIO_OUTPUT_NORMAL) {
453 if (ipc.opt > IDLE_PROF_OPT_CALI)
a666cab8 454 log_buf(out, "\nCPU idleness:\n");
f2a2ce0e 455 else if (ipc.opt == IDLE_PROF_OPT_CALI)
a666cab8 456 log_buf(out, "CPU idleness:\n");
f2a2ce0e
HL
457
458 if (ipc.opt >= IDLE_PROF_OPT_SYSTEM)
a666cab8 459 log_buf(out, " system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1));
f2a2ce0e
HL
460
461 if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
a666cab8 462 log_buf(out, " percpu: %3.2f%%", fio_idle_prof_cpu_stat(0));
680b5abc 463 for (i = 1; i < nr_cpus; i++)
a666cab8
JA
464 log_buf(out, ", %3.2f%%", fio_idle_prof_cpu_stat(i));
465 log_buf(out, "\n");
f2a2ce0e
HL
466 }
467
468 if (ipc.opt >= IDLE_PROF_OPT_CALI) {
a666cab8
JA
469 log_buf(out, " unit work: mean=%3.2fus,", ipc.cali_mean);
470 log_buf(out, " stddev=%3.2f\n", ipc.cali_stddev);
f2a2ce0e
HL
471 }
472
473 /* dynamic mem allocations can now be freed */
474 if (ipc.opt != IDLE_PROF_OPT_NONE)
475 fio_idle_prof_cleanup();
476
477 return;
478 }
680b5abc 479
236d24df 480 if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output & FIO_OUTPUT_JSON)) {
f2a2ce0e
HL
481 if (!parent)
482 return;
483
484 tmp = json_create_object();
485 if (!tmp)
486 return;
487
488 json_object_add_value_object(parent, "cpu_idleness", tmp);
489 json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1));
490
491 if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
680b5abc 492 for (i = 0; i < nr_cpus; i++) {
f2a2ce0e
HL
493 snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i);
494 json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i));
495 }
496 }
497
498 json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean);
499 json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev);
500
501 fio_idle_prof_cleanup();
f2a2ce0e
HL
502 }
503}