windows: update EULA
[fio.git] / iolog.c
... / ...
CommitLineData
1/*
2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
4 */
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include <sys/types.h>
9#include <sys/stat.h>
10#include <unistd.h>
11#ifdef CONFIG_ZLIB
12#include <zlib.h>
13#endif
14
15#include "flist.h"
16#include "fio.h"
17#include "trim.h"
18#include "filelock.h"
19#include "smalloc.h"
20#include "blktrace.h"
21#include "pshared.h"
22
23static int iolog_flush(struct io_log *log);
24
25static const char iolog_ver2[] = "fio version 2 iolog";
26
27void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
28{
29 flist_add_tail(&ipo->list, &td->io_log_list);
30 td->total_io_size += ipo->len;
31}
32
33void log_io_u(const struct thread_data *td, const struct io_u *io_u)
34{
35 if (!td->o.write_iolog_file)
36 return;
37
38 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
39 io_ddir_name(io_u->ddir),
40 io_u->offset, io_u->buflen);
41}
42
43void log_file(struct thread_data *td, struct fio_file *f,
44 enum file_log_act what)
45{
46 const char *act[] = { "add", "open", "close" };
47
48 assert(what < 3);
49
50 if (!td->o.write_iolog_file)
51 return;
52
53
54 /*
55 * this happens on the pre-open/close done before the job starts
56 */
57 if (!td->iolog_f)
58 return;
59
60 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
61}
62
63static void iolog_delay(struct thread_data *td, unsigned long delay)
64{
65 uint64_t usec = utime_since_now(&td->last_issue);
66 uint64_t this_delay;
67 struct timespec ts;
68
69 if (delay < td->time_offset) {
70 td->time_offset = 0;
71 return;
72 }
73
74 delay -= td->time_offset;
75 if (delay < usec)
76 return;
77
78 delay -= usec;
79
80 fio_gettime(&ts, NULL);
81 while (delay && !td->terminate) {
82 this_delay = delay;
83 if (this_delay > 500000)
84 this_delay = 500000;
85
86 usec_sleep(td, this_delay);
87 delay -= this_delay;
88 }
89
90 usec = utime_since_now(&ts);
91 if (usec > delay)
92 td->time_offset = usec - delay;
93 else
94 td->time_offset = 0;
95}
96
97static int ipo_special(struct thread_data *td, struct io_piece *ipo)
98{
99 struct fio_file *f;
100 int ret;
101
102 /*
103 * Not a special ipo
104 */
105 if (ipo->ddir != DDIR_INVAL)
106 return 0;
107
108 f = td->files[ipo->fileno];
109
110 switch (ipo->file_action) {
111 case FIO_LOG_OPEN_FILE:
112 if (td->o.replay_redirect && fio_file_open(f)) {
113 dprint(FD_FILE, "iolog: ignoring re-open of file %s\n",
114 f->file_name);
115 break;
116 }
117 ret = td_io_open_file(td, f);
118 if (!ret)
119 break;
120 td_verror(td, ret, "iolog open file");
121 return -1;
122 case FIO_LOG_CLOSE_FILE:
123 td_io_close_file(td, f);
124 break;
125 case FIO_LOG_UNLINK_FILE:
126 td_io_unlink_file(td, f);
127 break;
128 default:
129 log_err("fio: bad file action %d\n", ipo->file_action);
130 break;
131 }
132
133 return 1;
134}
135
136int read_iolog_get(struct thread_data *td, struct io_u *io_u)
137{
138 struct io_piece *ipo;
139 unsigned long elapsed;
140
141 while (!flist_empty(&td->io_log_list)) {
142 int ret;
143
144 ipo = flist_first_entry(&td->io_log_list, struct io_piece, list);
145 flist_del(&ipo->list);
146 remove_trim_entry(td, ipo);
147
148 ret = ipo_special(td, ipo);
149 if (ret < 0) {
150 free(ipo);
151 break;
152 } else if (ret > 0) {
153 free(ipo);
154 continue;
155 }
156
157 io_u->ddir = ipo->ddir;
158 if (ipo->ddir != DDIR_WAIT) {
159 io_u->offset = ipo->offset;
160 io_u->buflen = ipo->len;
161 io_u->file = td->files[ipo->fileno];
162 get_file(io_u->file);
163 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
164 io_u->buflen, io_u->file->file_name);
165 if (ipo->delay)
166 iolog_delay(td, ipo->delay);
167 } else {
168 elapsed = mtime_since_genesis();
169 if (ipo->delay > elapsed)
170 usec_sleep(td, (ipo->delay - elapsed) * 1000);
171 }
172
173 free(ipo);
174
175 if (io_u->ddir != DDIR_WAIT)
176 return 0;
177 }
178
179 td->done = 1;
180 return 1;
181}
182
183void prune_io_piece_log(struct thread_data *td)
184{
185 struct io_piece *ipo;
186 struct fio_rb_node *n;
187
188 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
189 ipo = rb_entry(n, struct io_piece, rb_node);
190 rb_erase(n, &td->io_hist_tree);
191 remove_trim_entry(td, ipo);
192 td->io_hist_len--;
193 free(ipo);
194 }
195
196 while (!flist_empty(&td->io_hist_list)) {
197 ipo = flist_first_entry(&td->io_hist_list, struct io_piece, list);
198 flist_del(&ipo->list);
199 remove_trim_entry(td, ipo);
200 td->io_hist_len--;
201 free(ipo);
202 }
203}
204
205/*
206 * log a successful write, so we can unwind the log for verify
207 */
208void log_io_piece(struct thread_data *td, struct io_u *io_u)
209{
210 struct fio_rb_node **p, *parent;
211 struct io_piece *ipo, *__ipo;
212
213 ipo = malloc(sizeof(struct io_piece));
214 init_ipo(ipo);
215 ipo->file = io_u->file;
216 ipo->offset = io_u->offset;
217 ipo->len = io_u->buflen;
218 ipo->numberio = io_u->numberio;
219 ipo->flags = IP_F_IN_FLIGHT;
220
221 io_u->ipo = ipo;
222
223 if (io_u_should_trim(td, io_u)) {
224 flist_add_tail(&ipo->trim_list, &td->trim_list);
225 td->trim_entries++;
226 }
227
228 /*
229 * We don't need to sort the entries if we only performed sequential
230 * writes. In this case, just reading back data in the order we wrote
231 * it out is the faster but still safe.
232 *
233 * One exception is if we don't have a random map in which case we need
234 * to check for duplicate blocks and drop the old one, which we rely on
235 * the rb insert/lookup for handling.
236 */
237 if (((!td->o.verifysort) || !td_random(td)) &&
238 file_randommap(td, ipo->file)) {
239 INIT_FLIST_HEAD(&ipo->list);
240 flist_add_tail(&ipo->list, &td->io_hist_list);
241 ipo->flags |= IP_F_ONLIST;
242 td->io_hist_len++;
243 return;
244 }
245
246 RB_CLEAR_NODE(&ipo->rb_node);
247
248 /*
249 * Sort the entry into the verification list
250 */
251restart:
252 p = &td->io_hist_tree.rb_node;
253 parent = NULL;
254 while (*p) {
255 int overlap = 0;
256 parent = *p;
257
258 __ipo = rb_entry(parent, struct io_piece, rb_node);
259 if (ipo->file < __ipo->file)
260 p = &(*p)->rb_left;
261 else if (ipo->file > __ipo->file)
262 p = &(*p)->rb_right;
263 else if (ipo->offset < __ipo->offset) {
264 p = &(*p)->rb_left;
265 overlap = ipo->offset + ipo->len > __ipo->offset;
266 }
267 else if (ipo->offset > __ipo->offset) {
268 p = &(*p)->rb_right;
269 overlap = __ipo->offset + __ipo->len > ipo->offset;
270 }
271 else
272 overlap = 1;
273
274 if (overlap) {
275 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu\n",
276 __ipo->offset, __ipo->len,
277 ipo->offset, ipo->len);
278 td->io_hist_len--;
279 rb_erase(parent, &td->io_hist_tree);
280 remove_trim_entry(td, __ipo);
281 if (!(__ipo->flags & IP_F_IN_FLIGHT))
282 free(__ipo);
283 goto restart;
284 }
285 }
286
287 rb_link_node(&ipo->rb_node, parent, p);
288 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
289 ipo->flags |= IP_F_ONRB;
290 td->io_hist_len++;
291}
292
293void unlog_io_piece(struct thread_data *td, struct io_u *io_u)
294{
295 struct io_piece *ipo = io_u->ipo;
296
297 if (td->ts.nr_block_infos) {
298 uint32_t *info = io_u_block_info(td, io_u);
299 if (BLOCK_INFO_STATE(*info) < BLOCK_STATE_TRIM_FAILURE) {
300 if (io_u->ddir == DDIR_TRIM)
301 *info = BLOCK_INFO_SET_STATE(*info,
302 BLOCK_STATE_TRIM_FAILURE);
303 else if (io_u->ddir == DDIR_WRITE)
304 *info = BLOCK_INFO_SET_STATE(*info,
305 BLOCK_STATE_WRITE_FAILURE);
306 }
307 }
308
309 if (!ipo)
310 return;
311
312 if (ipo->flags & IP_F_ONRB)
313 rb_erase(&ipo->rb_node, &td->io_hist_tree);
314 else if (ipo->flags & IP_F_ONLIST)
315 flist_del(&ipo->list);
316
317 free(ipo);
318 io_u->ipo = NULL;
319 td->io_hist_len--;
320}
321
322void trim_io_piece(struct thread_data *td, const struct io_u *io_u)
323{
324 struct io_piece *ipo = io_u->ipo;
325
326 if (!ipo)
327 return;
328
329 ipo->len = io_u->xfer_buflen - io_u->resid;
330}
331
332void write_iolog_close(struct thread_data *td)
333{
334 fflush(td->iolog_f);
335 fclose(td->iolog_f);
336 free(td->iolog_buf);
337 td->iolog_f = NULL;
338 td->iolog_buf = NULL;
339}
340
341/*
342 * Read version 2 iolog data. It is enhanced to include per-file logging,
343 * syncs, etc.
344 */
345static int read_iolog2(struct thread_data *td, FILE *f)
346{
347 unsigned long long offset;
348 unsigned int bytes;
349 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
350 char *rfname, *fname, *act;
351 char *str, *p;
352 enum fio_ddir rw;
353
354 free_release_files(td);
355
356 /*
357 * Read in the read iolog and store it, reuse the infrastructure
358 * for doing verifications.
359 */
360 str = malloc(4096);
361 rfname = fname = malloc(256+16);
362 act = malloc(256+16);
363
364 reads = writes = waits = 0;
365 while ((p = fgets(str, 4096, f)) != NULL) {
366 struct io_piece *ipo;
367 int r;
368
369 r = sscanf(p, "%256s %256s %llu %u", rfname, act, &offset,
370 &bytes);
371
372 if (td->o.replay_redirect)
373 fname = td->o.replay_redirect;
374
375 if (r == 4) {
376 /*
377 * Check action first
378 */
379 if (!strcmp(act, "wait"))
380 rw = DDIR_WAIT;
381 else if (!strcmp(act, "read"))
382 rw = DDIR_READ;
383 else if (!strcmp(act, "write"))
384 rw = DDIR_WRITE;
385 else if (!strcmp(act, "sync"))
386 rw = DDIR_SYNC;
387 else if (!strcmp(act, "datasync"))
388 rw = DDIR_DATASYNC;
389 else if (!strcmp(act, "trim"))
390 rw = DDIR_TRIM;
391 else {
392 log_err("fio: bad iolog file action: %s\n",
393 act);
394 continue;
395 }
396 fileno = get_fileno(td, fname);
397 } else if (r == 2) {
398 rw = DDIR_INVAL;
399 if (!strcmp(act, "add")) {
400 if (td->o.replay_redirect &&
401 get_fileno(td, fname) != -1) {
402 dprint(FD_FILE, "iolog: ignoring"
403 " re-add of file %s\n", fname);
404 } else {
405 fileno = add_file(td, fname, 0, 1);
406 file_action = FIO_LOG_ADD_FILE;
407 }
408 continue;
409 } else if (!strcmp(act, "open")) {
410 fileno = get_fileno(td, fname);
411 file_action = FIO_LOG_OPEN_FILE;
412 } else if (!strcmp(act, "close")) {
413 fileno = get_fileno(td, fname);
414 file_action = FIO_LOG_CLOSE_FILE;
415 } else {
416 log_err("fio: bad iolog file action: %s\n",
417 act);
418 continue;
419 }
420 } else {
421 log_err("bad iolog2: %s\n", p);
422 continue;
423 }
424
425 if (rw == DDIR_READ)
426 reads++;
427 else if (rw == DDIR_WRITE) {
428 /*
429 * Don't add a write for ro mode
430 */
431 if (read_only)
432 continue;
433 writes++;
434 } else if (rw == DDIR_WAIT) {
435 if (td->o.no_stall)
436 continue;
437 waits++;
438 } else if (rw == DDIR_INVAL) {
439 } else if (!ddir_sync(rw)) {
440 log_err("bad ddir: %d\n", rw);
441 continue;
442 }
443
444 /*
445 * Make note of file
446 */
447 ipo = malloc(sizeof(*ipo));
448 init_ipo(ipo);
449 ipo->ddir = rw;
450 if (rw == DDIR_WAIT) {
451 ipo->delay = offset;
452 } else {
453 if (td->o.replay_scale)
454 ipo->offset = offset / td->o.replay_scale;
455 else
456 ipo->offset = offset;
457 ipo_bytes_align(td->o.replay_align, ipo);
458
459 ipo->len = bytes;
460 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
461 td->o.max_bs[rw] = bytes;
462 ipo->fileno = fileno;
463 ipo->file_action = file_action;
464 td->o.size += bytes;
465 }
466
467 queue_io_piece(td, ipo);
468 }
469
470 free(str);
471 free(act);
472 free(rfname);
473
474 if (writes && read_only) {
475 log_err("fio: <%s> skips replay of %d writes due to"
476 " read-only\n", td->o.name, writes);
477 writes = 0;
478 }
479
480 if (!reads && !writes && !waits)
481 return 1;
482 else if (reads && !writes)
483 td->o.td_ddir = TD_DDIR_READ;
484 else if (!reads && writes)
485 td->o.td_ddir = TD_DDIR_WRITE;
486 else
487 td->o.td_ddir = TD_DDIR_RW;
488
489 return 0;
490}
491
492/*
493 * open iolog, check version, and call appropriate parser
494 */
495static int init_iolog_read(struct thread_data *td)
496{
497 char buffer[256], *p;
498 FILE *f;
499 int ret;
500
501 f = fopen(td->o.read_iolog_file, "r");
502 if (!f) {
503 perror("fopen read iolog");
504 return 1;
505 }
506
507 p = fgets(buffer, sizeof(buffer), f);
508 if (!p) {
509 td_verror(td, errno, "iolog read");
510 log_err("fio: unable to read iolog\n");
511 fclose(f);
512 return 1;
513 }
514
515 /*
516 * version 2 of the iolog stores a specific string as the
517 * first line, check for that
518 */
519 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
520 ret = read_iolog2(td, f);
521 else {
522 log_err("fio: iolog version 1 is no longer supported\n");
523 ret = 1;
524 }
525
526 fclose(f);
527 return ret;
528}
529
530/*
531 * Set up a log for storing io patterns.
532 */
533static int init_iolog_write(struct thread_data *td)
534{
535 struct fio_file *ff;
536 FILE *f;
537 unsigned int i;
538
539 f = fopen(td->o.write_iolog_file, "a");
540 if (!f) {
541 perror("fopen write iolog");
542 return 1;
543 }
544
545 /*
546 * That's it for writing, setup a log buffer and we're done.
547 */
548 td->iolog_f = f;
549 td->iolog_buf = malloc(8192);
550 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
551
552 /*
553 * write our version line
554 */
555 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
556 perror("iolog init\n");
557 return 1;
558 }
559
560 /*
561 * add all known files
562 */
563 for_each_file(td, ff, i)
564 log_file(td, ff, FIO_LOG_ADD_FILE);
565
566 return 0;
567}
568
569int init_iolog(struct thread_data *td)
570{
571 int ret = 0;
572
573 if (td->o.read_iolog_file) {
574 int need_swap;
575
576 /*
577 * Check if it's a blktrace file and load that if possible.
578 * Otherwise assume it's a normal log file and load that.
579 */
580 if (is_blktrace(td->o.read_iolog_file, &need_swap))
581 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
582 else
583 ret = init_iolog_read(td);
584 } else if (td->o.write_iolog_file)
585 ret = init_iolog_write(td);
586
587 if (ret)
588 td_verror(td, EINVAL, "failed initializing iolog");
589
590 return ret;
591}
592
593void setup_log(struct io_log **log, struct log_params *p,
594 const char *filename)
595{
596 struct io_log *l;
597 int i;
598 struct io_u_plat_entry *entry;
599 struct flist_head *list;
600
601 l = scalloc(1, sizeof(*l));
602 INIT_FLIST_HEAD(&l->io_logs);
603 l->log_type = p->log_type;
604 l->log_offset = p->log_offset;
605 l->log_gz = p->log_gz;
606 l->log_gz_store = p->log_gz_store;
607 l->avg_msec = p->avg_msec;
608 l->hist_msec = p->hist_msec;
609 l->hist_coarseness = p->hist_coarseness;
610 l->filename = strdup(filename);
611 l->td = p->td;
612
613 /* Initialize histogram lists for each r/w direction,
614 * with initial io_u_plat of all zeros:
615 */
616 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
617 list = &l->hist_window[i].list;
618 INIT_FLIST_HEAD(list);
619 entry = calloc(1, sizeof(struct io_u_plat_entry));
620 flist_add(&entry->list, list);
621 }
622
623 if (l->td && l->td->o.io_submit_mode != IO_MODE_OFFLOAD) {
624 struct io_logs *p;
625
626 p = calloc(1, sizeof(*l->pending));
627 p->max_samples = DEF_LOG_ENTRIES;
628 p->log = calloc(p->max_samples, log_entry_sz(l));
629 l->pending = p;
630 }
631
632 if (l->log_offset)
633 l->log_ddir_mask = LOG_OFFSET_SAMPLE_BIT;
634
635 INIT_FLIST_HEAD(&l->chunk_list);
636
637 if (l->log_gz && !p->td)
638 l->log_gz = 0;
639 else if (l->log_gz || l->log_gz_store) {
640 mutex_init_pshared(&l->chunk_lock);
641 mutex_init_pshared(&l->deferred_free_lock);
642 p->td->flags |= TD_F_COMPRESS_LOG;
643 }
644
645 *log = l;
646}
647
648#ifdef CONFIG_SETVBUF
649static void *set_file_buffer(FILE *f)
650{
651 size_t size = 1048576;
652 void *buf;
653
654 buf = malloc(size);
655 setvbuf(f, buf, _IOFBF, size);
656 return buf;
657}
658
659static void clear_file_buffer(void *buf)
660{
661 free(buf);
662}
663#else
664static void *set_file_buffer(FILE *f)
665{
666 return NULL;
667}
668
669static void clear_file_buffer(void *buf)
670{
671}
672#endif
673
674void free_log(struct io_log *log)
675{
676 while (!flist_empty(&log->io_logs)) {
677 struct io_logs *cur_log;
678
679 cur_log = flist_first_entry(&log->io_logs, struct io_logs, list);
680 flist_del_init(&cur_log->list);
681 free(cur_log->log);
682 sfree(cur_log);
683 }
684
685 if (log->pending) {
686 free(log->pending->log);
687 free(log->pending);
688 log->pending = NULL;
689 }
690
691 free(log->pending);
692 free(log->filename);
693 sfree(log);
694}
695
696uint64_t hist_sum(int j, int stride, uint64_t *io_u_plat,
697 uint64_t *io_u_plat_last)
698{
699 uint64_t sum;
700 int k;
701
702 if (io_u_plat_last) {
703 for (k = sum = 0; k < stride; k++)
704 sum += io_u_plat[j + k] - io_u_plat_last[j + k];
705 } else {
706 for (k = sum = 0; k < stride; k++)
707 sum += io_u_plat[j + k];
708 }
709
710 return sum;
711}
712
713static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
714 uint64_t sample_size)
715{
716 struct io_sample *s;
717 int log_offset;
718 uint64_t i, j, nr_samples;
719 struct io_u_plat_entry *entry, *entry_before;
720 uint64_t *io_u_plat;
721 uint64_t *io_u_plat_before;
722
723 int stride = 1 << hist_coarseness;
724
725 if (!sample_size)
726 return;
727
728 s = __get_sample(samples, 0, 0);
729 log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
730
731 nr_samples = sample_size / __log_entry_sz(log_offset);
732
733 for (i = 0; i < nr_samples; i++) {
734 s = __get_sample(samples, log_offset, i);
735
736 entry = s->data.plat_entry;
737 io_u_plat = entry->io_u_plat;
738
739 entry_before = flist_first_entry(&entry->list, struct io_u_plat_entry, list);
740 io_u_plat_before = entry_before->io_u_plat;
741
742 fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time,
743 io_sample_ddir(s), s->bs);
744 for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) {
745 fprintf(f, "%llu, ", (unsigned long long)
746 hist_sum(j, stride, io_u_plat, io_u_plat_before));
747 }
748 fprintf(f, "%llu\n", (unsigned long long)
749 hist_sum(FIO_IO_U_PLAT_NR - stride, stride, io_u_plat,
750 io_u_plat_before));
751
752 flist_del(&entry_before->list);
753 free(entry_before);
754 }
755}
756
757void flush_samples(FILE *f, void *samples, uint64_t sample_size)
758{
759 struct io_sample *s;
760 int log_offset;
761 uint64_t i, nr_samples;
762
763 if (!sample_size)
764 return;
765
766 s = __get_sample(samples, 0, 0);
767 log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
768
769 nr_samples = sample_size / __log_entry_sz(log_offset);
770
771 for (i = 0; i < nr_samples; i++) {
772 s = __get_sample(samples, log_offset, i);
773
774 if (!log_offset) {
775 fprintf(f, "%lu, %" PRId64 ", %u, %u\n",
776 (unsigned long) s->time,
777 s->data.val,
778 io_sample_ddir(s), s->bs);
779 } else {
780 struct io_sample_offset *so = (void *) s;
781
782 fprintf(f, "%lu, %" PRId64 ", %u, %u, %llu\n",
783 (unsigned long) s->time,
784 s->data.val,
785 io_sample_ddir(s), s->bs,
786 (unsigned long long) so->offset);
787 }
788 }
789}
790
791#ifdef CONFIG_ZLIB
792
793struct iolog_flush_data {
794 struct workqueue_work work;
795 struct io_log *log;
796 void *samples;
797 uint32_t nr_samples;
798 bool free;
799};
800
801#define GZ_CHUNK 131072
802
803static struct iolog_compress *get_new_chunk(unsigned int seq)
804{
805 struct iolog_compress *c;
806
807 c = malloc(sizeof(*c));
808 INIT_FLIST_HEAD(&c->list);
809 c->buf = malloc(GZ_CHUNK);
810 c->len = 0;
811 c->seq = seq;
812 return c;
813}
814
815static void free_chunk(struct iolog_compress *ic)
816{
817 free(ic->buf);
818 free(ic);
819}
820
821static int z_stream_init(z_stream *stream, int gz_hdr)
822{
823 int wbits = 15;
824
825 memset(stream, 0, sizeof(*stream));
826 stream->zalloc = Z_NULL;
827 stream->zfree = Z_NULL;
828 stream->opaque = Z_NULL;
829 stream->next_in = Z_NULL;
830
831 /*
832 * zlib magic - add 32 for auto-detection of gz header or not,
833 * if we decide to store files in a gzip friendly format.
834 */
835 if (gz_hdr)
836 wbits += 32;
837
838 if (inflateInit2(stream, wbits) != Z_OK)
839 return 1;
840
841 return 0;
842}
843
844struct inflate_chunk_iter {
845 unsigned int seq;
846 int err;
847 void *buf;
848 size_t buf_size;
849 size_t buf_used;
850 size_t chunk_sz;
851};
852
853static void finish_chunk(z_stream *stream, FILE *f,
854 struct inflate_chunk_iter *iter)
855{
856 int ret;
857
858 ret = inflateEnd(stream);
859 if (ret != Z_OK)
860 log_err("fio: failed to end log inflation seq %d (%d)\n",
861 iter->seq, ret);
862
863 flush_samples(f, iter->buf, iter->buf_used);
864 free(iter->buf);
865 iter->buf = NULL;
866 iter->buf_size = iter->buf_used = 0;
867}
868
869/*
870 * Iterative chunk inflation. Handles cases where we cross into a new
871 * sequence, doing flush finish of previous chunk if needed.
872 */
873static size_t inflate_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f,
874 z_stream *stream, struct inflate_chunk_iter *iter)
875{
876 size_t ret;
877
878 dprint(FD_COMPRESS, "inflate chunk size=%lu, seq=%u\n",
879 (unsigned long) ic->len, ic->seq);
880
881 if (ic->seq != iter->seq) {
882 if (iter->seq)
883 finish_chunk(stream, f, iter);
884
885 z_stream_init(stream, gz_hdr);
886 iter->seq = ic->seq;
887 }
888
889 stream->avail_in = ic->len;
890 stream->next_in = ic->buf;
891
892 if (!iter->buf_size) {
893 iter->buf_size = iter->chunk_sz;
894 iter->buf = malloc(iter->buf_size);
895 }
896
897 while (stream->avail_in) {
898 size_t this_out = iter->buf_size - iter->buf_used;
899 int err;
900
901 stream->avail_out = this_out;
902 stream->next_out = iter->buf + iter->buf_used;
903
904 err = inflate(stream, Z_NO_FLUSH);
905 if (err < 0) {
906 log_err("fio: failed inflating log: %d\n", err);
907 iter->err = err;
908 break;
909 }
910
911 iter->buf_used += this_out - stream->avail_out;
912
913 if (!stream->avail_out) {
914 iter->buf_size += iter->chunk_sz;
915 iter->buf = realloc(iter->buf, iter->buf_size);
916 continue;
917 }
918
919 if (err == Z_STREAM_END)
920 break;
921 }
922
923 ret = (void *) stream->next_in - ic->buf;
924
925 dprint(FD_COMPRESS, "inflated to size=%lu\n", (unsigned long) iter->buf_size);
926
927 return ret;
928}
929
930/*
931 * Inflate stored compressed chunks, or write them directly to the log
932 * file if so instructed.
933 */
934static int inflate_gz_chunks(struct io_log *log, FILE *f)
935{
936 struct inflate_chunk_iter iter = { .chunk_sz = log->log_gz, };
937 z_stream stream;
938
939 while (!flist_empty(&log->chunk_list)) {
940 struct iolog_compress *ic;
941
942 ic = flist_first_entry(&log->chunk_list, struct iolog_compress, list);
943 flist_del(&ic->list);
944
945 if (log->log_gz_store) {
946 size_t ret;
947
948 dprint(FD_COMPRESS, "log write chunk size=%lu, "
949 "seq=%u\n", (unsigned long) ic->len, ic->seq);
950
951 ret = fwrite(ic->buf, ic->len, 1, f);
952 if (ret != 1 || ferror(f)) {
953 iter.err = errno;
954 log_err("fio: error writing compressed log\n");
955 }
956 } else
957 inflate_chunk(ic, log->log_gz_store, f, &stream, &iter);
958
959 free_chunk(ic);
960 }
961
962 if (iter.seq) {
963 finish_chunk(&stream, f, &iter);
964 free(iter.buf);
965 }
966
967 return iter.err;
968}
969
970/*
971 * Open compressed log file and decompress the stored chunks and
972 * write them to stdout. The chunks are stored sequentially in the
973 * file, so we iterate over them and do them one-by-one.
974 */
975int iolog_file_inflate(const char *file)
976{
977 struct inflate_chunk_iter iter = { .chunk_sz = 64 * 1024 * 1024, };
978 struct iolog_compress ic;
979 z_stream stream;
980 struct stat sb;
981 ssize_t ret;
982 size_t total;
983 void *buf;
984 FILE *f;
985
986 f = fopen(file, "r");
987 if (!f) {
988 perror("fopen");
989 return 1;
990 }
991
992 if (stat(file, &sb) < 0) {
993 fclose(f);
994 perror("stat");
995 return 1;
996 }
997
998 ic.buf = buf = malloc(sb.st_size);
999 ic.len = sb.st_size;
1000 ic.seq = 1;
1001
1002 ret = fread(ic.buf, ic.len, 1, f);
1003 if (ret < 0) {
1004 perror("fread");
1005 fclose(f);
1006 free(buf);
1007 return 1;
1008 } else if (ret != 1) {
1009 log_err("fio: short read on reading log\n");
1010 fclose(f);
1011 free(buf);
1012 return 1;
1013 }
1014
1015 fclose(f);
1016
1017 /*
1018 * Each chunk will return Z_STREAM_END. We don't know how many
1019 * chunks are in the file, so we just keep looping and incrementing
1020 * the sequence number until we have consumed the whole compressed
1021 * file.
1022 */
1023 total = ic.len;
1024 do {
1025 size_t iret;
1026
1027 iret = inflate_chunk(&ic, 1, stdout, &stream, &iter);
1028 total -= iret;
1029 if (!total)
1030 break;
1031 if (iter.err)
1032 break;
1033
1034 ic.seq++;
1035 ic.len -= iret;
1036 ic.buf += iret;
1037 } while (1);
1038
1039 if (iter.seq) {
1040 finish_chunk(&stream, stdout, &iter);
1041 free(iter.buf);
1042 }
1043
1044 free(buf);
1045 return iter.err;
1046}
1047
1048#else
1049
1050static int inflate_gz_chunks(struct io_log *log, FILE *f)
1051{
1052 return 0;
1053}
1054
1055int iolog_file_inflate(const char *file)
1056{
1057 log_err("fio: log inflation not possible without zlib\n");
1058 return 1;
1059}
1060
1061#endif
1062
1063void flush_log(struct io_log *log, bool do_append)
1064{
1065 void *buf;
1066 FILE *f;
1067
1068 if (!do_append)
1069 f = fopen(log->filename, "w");
1070 else
1071 f = fopen(log->filename, "a");
1072 if (!f) {
1073 perror("fopen log");
1074 return;
1075 }
1076
1077 buf = set_file_buffer(f);
1078
1079 inflate_gz_chunks(log, f);
1080
1081 while (!flist_empty(&log->io_logs)) {
1082 struct io_logs *cur_log;
1083
1084 cur_log = flist_first_entry(&log->io_logs, struct io_logs, list);
1085 flist_del_init(&cur_log->list);
1086
1087 if (log->td && log == log->td->clat_hist_log)
1088 flush_hist_samples(f, log->hist_coarseness, cur_log->log,
1089 log_sample_sz(log, cur_log));
1090 else
1091 flush_samples(f, cur_log->log, log_sample_sz(log, cur_log));
1092
1093 sfree(cur_log);
1094 }
1095
1096 fclose(f);
1097 clear_file_buffer(buf);
1098}
1099
1100static int finish_log(struct thread_data *td, struct io_log *log, int trylock)
1101{
1102 if (td->flags & TD_F_COMPRESS_LOG)
1103 iolog_flush(log);
1104
1105 if (trylock) {
1106 if (fio_trylock_file(log->filename))
1107 return 1;
1108 } else
1109 fio_lock_file(log->filename);
1110
1111 if (td->client_type == FIO_CLIENT_TYPE_GUI || is_backend)
1112 fio_send_iolog(td, log, log->filename);
1113 else
1114 flush_log(log, !td->o.per_job_logs);
1115
1116 fio_unlock_file(log->filename);
1117 free_log(log);
1118 return 0;
1119}
1120
1121size_t log_chunk_sizes(struct io_log *log)
1122{
1123 struct flist_head *entry;
1124 size_t ret;
1125
1126 if (flist_empty(&log->chunk_list))
1127 return 0;
1128
1129 ret = 0;
1130 pthread_mutex_lock(&log->chunk_lock);
1131 flist_for_each(entry, &log->chunk_list) {
1132 struct iolog_compress *c;
1133
1134 c = flist_entry(entry, struct iolog_compress, list);
1135 ret += c->len;
1136 }
1137 pthread_mutex_unlock(&log->chunk_lock);
1138 return ret;
1139}
1140
1141#ifdef CONFIG_ZLIB
1142
1143static void iolog_put_deferred(struct io_log *log, void *ptr)
1144{
1145 if (!ptr)
1146 return;
1147
1148 pthread_mutex_lock(&log->deferred_free_lock);
1149 if (log->deferred < IOLOG_MAX_DEFER) {
1150 log->deferred_items[log->deferred] = ptr;
1151 log->deferred++;
1152 } else if (!fio_did_warn(FIO_WARN_IOLOG_DROP))
1153 log_err("fio: had to drop log entry free\n");
1154 pthread_mutex_unlock(&log->deferred_free_lock);
1155}
1156
1157static void iolog_free_deferred(struct io_log *log)
1158{
1159 int i;
1160
1161 if (!log->deferred)
1162 return;
1163
1164 pthread_mutex_lock(&log->deferred_free_lock);
1165
1166 for (i = 0; i < log->deferred; i++) {
1167 free(log->deferred_items[i]);
1168 log->deferred_items[i] = NULL;
1169 }
1170
1171 log->deferred = 0;
1172 pthread_mutex_unlock(&log->deferred_free_lock);
1173}
1174
1175static int gz_work(struct iolog_flush_data *data)
1176{
1177 struct iolog_compress *c = NULL;
1178 struct flist_head list;
1179 unsigned int seq;
1180 z_stream stream;
1181 size_t total = 0;
1182 int ret;
1183
1184 INIT_FLIST_HEAD(&list);
1185
1186 memset(&stream, 0, sizeof(stream));
1187 stream.zalloc = Z_NULL;
1188 stream.zfree = Z_NULL;
1189 stream.opaque = Z_NULL;
1190
1191 ret = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
1192 if (ret != Z_OK) {
1193 log_err("fio: failed to init gz stream\n");
1194 goto err;
1195 }
1196
1197 seq = ++data->log->chunk_seq;
1198
1199 stream.next_in = (void *) data->samples;
1200 stream.avail_in = data->nr_samples * log_entry_sz(data->log);
1201
1202 dprint(FD_COMPRESS, "deflate input size=%lu, seq=%u, log=%s\n",
1203 (unsigned long) stream.avail_in, seq,
1204 data->log->filename);
1205 do {
1206 if (c)
1207 dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq,
1208 (unsigned long) c->len);
1209 c = get_new_chunk(seq);
1210 stream.avail_out = GZ_CHUNK;
1211 stream.next_out = c->buf;
1212 ret = deflate(&stream, Z_NO_FLUSH);
1213 if (ret < 0) {
1214 log_err("fio: deflate log (%d)\n", ret);
1215 free_chunk(c);
1216 goto err;
1217 }
1218
1219 c->len = GZ_CHUNK - stream.avail_out;
1220 flist_add_tail(&c->list, &list);
1221 total += c->len;
1222 } while (stream.avail_in);
1223
1224 stream.next_out = c->buf + c->len;
1225 stream.avail_out = GZ_CHUNK - c->len;
1226
1227 ret = deflate(&stream, Z_FINISH);
1228 if (ret < 0) {
1229 /*
1230 * Z_BUF_ERROR is special, it just means we need more
1231 * output space. We'll handle that below. Treat any other
1232 * error as fatal.
1233 */
1234 if (ret != Z_BUF_ERROR) {
1235 log_err("fio: deflate log (%d)\n", ret);
1236 flist_del(&c->list);
1237 free_chunk(c);
1238 goto err;
1239 }
1240 }
1241
1242 total -= c->len;
1243 c->len = GZ_CHUNK - stream.avail_out;
1244 total += c->len;
1245 dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, (unsigned long) c->len);
1246
1247 if (ret != Z_STREAM_END) {
1248 do {
1249 c = get_new_chunk(seq);
1250 stream.avail_out = GZ_CHUNK;
1251 stream.next_out = c->buf;
1252 ret = deflate(&stream, Z_FINISH);
1253 c->len = GZ_CHUNK - stream.avail_out;
1254 total += c->len;
1255 flist_add_tail(&c->list, &list);
1256 dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq,
1257 (unsigned long) c->len);
1258 } while (ret != Z_STREAM_END);
1259 }
1260
1261 dprint(FD_COMPRESS, "deflated to size=%lu\n", (unsigned long) total);
1262
1263 ret = deflateEnd(&stream);
1264 if (ret != Z_OK)
1265 log_err("fio: deflateEnd %d\n", ret);
1266
1267 iolog_put_deferred(data->log, data->samples);
1268
1269 if (!flist_empty(&list)) {
1270 pthread_mutex_lock(&data->log->chunk_lock);
1271 flist_splice_tail(&list, &data->log->chunk_list);
1272 pthread_mutex_unlock(&data->log->chunk_lock);
1273 }
1274
1275 ret = 0;
1276done:
1277 if (data->free)
1278 sfree(data);
1279 return ret;
1280err:
1281 while (!flist_empty(&list)) {
1282 c = flist_first_entry(list.next, struct iolog_compress, list);
1283 flist_del(&c->list);
1284 free_chunk(c);
1285 }
1286 ret = 1;
1287 goto done;
1288}
1289
1290/*
1291 * Invoked from our compress helper thread, when logging would have exceeded
1292 * the specified memory limitation. Compresses the previously stored
1293 * entries.
1294 */
1295static int gz_work_async(struct submit_worker *sw, struct workqueue_work *work)
1296{
1297 return gz_work(container_of(work, struct iolog_flush_data, work));
1298}
1299
1300static int gz_init_worker(struct submit_worker *sw)
1301{
1302 struct thread_data *td = sw->wq->td;
1303
1304 if (!fio_option_is_set(&td->o, log_gz_cpumask))
1305 return 0;
1306
1307 if (fio_setaffinity(gettid(), td->o.log_gz_cpumask) == -1) {
1308 log_err("gz: failed to set CPU affinity\n");
1309 return 1;
1310 }
1311
1312 return 0;
1313}
1314
1315static struct workqueue_ops log_compress_wq_ops = {
1316 .fn = gz_work_async,
1317 .init_worker_fn = gz_init_worker,
1318 .nice = 1,
1319};
1320
1321int iolog_compress_init(struct thread_data *td, struct sk_out *sk_out)
1322{
1323 if (!(td->flags & TD_F_COMPRESS_LOG))
1324 return 0;
1325
1326 workqueue_init(td, &td->log_compress_wq, &log_compress_wq_ops, 1, sk_out);
1327 return 0;
1328}
1329
1330void iolog_compress_exit(struct thread_data *td)
1331{
1332 if (!(td->flags & TD_F_COMPRESS_LOG))
1333 return;
1334
1335 workqueue_exit(&td->log_compress_wq);
1336}
1337
1338/*
1339 * Queue work item to compress the existing log entries. We reset the
1340 * current log to a small size, and reference the existing log in the
1341 * data that we queue for compression. Once compression has been done,
1342 * this old log is freed. If called with finish == true, will not return
1343 * until the log compression has completed, and will flush all previous
1344 * logs too
1345 */
1346static int iolog_flush(struct io_log *log)
1347{
1348 struct iolog_flush_data *data;
1349
1350 data = malloc(sizeof(*data));
1351 if (!data)
1352 return 1;
1353
1354 data->log = log;
1355 data->free = false;
1356
1357 while (!flist_empty(&log->io_logs)) {
1358 struct io_logs *cur_log;
1359
1360 cur_log = flist_first_entry(&log->io_logs, struct io_logs, list);
1361 flist_del_init(&cur_log->list);
1362
1363 data->samples = cur_log->log;
1364 data->nr_samples = cur_log->nr_samples;
1365
1366 sfree(cur_log);
1367
1368 gz_work(data);
1369 }
1370
1371 free(data);
1372 return 0;
1373}
1374
1375int iolog_cur_flush(struct io_log *log, struct io_logs *cur_log)
1376{
1377 struct iolog_flush_data *data;
1378
1379 data = smalloc(sizeof(*data));
1380 if (!data)
1381 return 1;
1382
1383 data->log = log;
1384
1385 data->samples = cur_log->log;
1386 data->nr_samples = cur_log->nr_samples;
1387 data->free = true;
1388
1389 cur_log->nr_samples = cur_log->max_samples = 0;
1390 cur_log->log = NULL;
1391
1392 workqueue_enqueue(&log->td->log_compress_wq, &data->work);
1393
1394 iolog_free_deferred(log);
1395
1396 return 0;
1397}
1398#else
1399
1400static int iolog_flush(struct io_log *log)
1401{
1402 return 1;
1403}
1404
1405int iolog_cur_flush(struct io_log *log, struct io_logs *cur_log)
1406{
1407 return 1;
1408}
1409
1410int iolog_compress_init(struct thread_data *td, struct sk_out *sk_out)
1411{
1412 return 0;
1413}
1414
1415void iolog_compress_exit(struct thread_data *td)
1416{
1417}
1418
1419#endif
1420
1421struct io_logs *iolog_cur_log(struct io_log *log)
1422{
1423 if (flist_empty(&log->io_logs))
1424 return NULL;
1425
1426 return flist_last_entry(&log->io_logs, struct io_logs, list);
1427}
1428
1429uint64_t iolog_nr_samples(struct io_log *iolog)
1430{
1431 struct flist_head *entry;
1432 uint64_t ret = 0;
1433
1434 flist_for_each(entry, &iolog->io_logs) {
1435 struct io_logs *cur_log;
1436
1437 cur_log = flist_entry(entry, struct io_logs, list);
1438 ret += cur_log->nr_samples;
1439 }
1440
1441 return ret;
1442}
1443
1444static int __write_log(struct thread_data *td, struct io_log *log, int try)
1445{
1446 if (log)
1447 return finish_log(td, log, try);
1448
1449 return 0;
1450}
1451
1452static int write_iops_log(struct thread_data *td, int try, bool unit_log)
1453{
1454 int ret;
1455
1456 if (per_unit_log(td->iops_log) != unit_log)
1457 return 0;
1458
1459 ret = __write_log(td, td->iops_log, try);
1460 if (!ret)
1461 td->iops_log = NULL;
1462
1463 return ret;
1464}
1465
1466static int write_slat_log(struct thread_data *td, int try, bool unit_log)
1467{
1468 int ret;
1469
1470 if (!unit_log)
1471 return 0;
1472
1473 ret = __write_log(td, td->slat_log, try);
1474 if (!ret)
1475 td->slat_log = NULL;
1476
1477 return ret;
1478}
1479
1480static int write_clat_log(struct thread_data *td, int try, bool unit_log)
1481{
1482 int ret;
1483
1484 if (!unit_log)
1485 return 0;
1486
1487 ret = __write_log(td, td->clat_log, try);
1488 if (!ret)
1489 td->clat_log = NULL;
1490
1491 return ret;
1492}
1493
1494static int write_clat_hist_log(struct thread_data *td, int try, bool unit_log)
1495{
1496 int ret;
1497
1498 if (!unit_log)
1499 return 0;
1500
1501 ret = __write_log(td, td->clat_hist_log, try);
1502 if (!ret)
1503 td->clat_hist_log = NULL;
1504
1505 return ret;
1506}
1507
1508static int write_lat_log(struct thread_data *td, int try, bool unit_log)
1509{
1510 int ret;
1511
1512 if (!unit_log)
1513 return 0;
1514
1515 ret = __write_log(td, td->lat_log, try);
1516 if (!ret)
1517 td->lat_log = NULL;
1518
1519 return ret;
1520}
1521
1522static int write_bandw_log(struct thread_data *td, int try, bool unit_log)
1523{
1524 int ret;
1525
1526 if (per_unit_log(td->bw_log) != unit_log)
1527 return 0;
1528
1529 ret = __write_log(td, td->bw_log, try);
1530 if (!ret)
1531 td->bw_log = NULL;
1532
1533 return ret;
1534}
1535
1536enum {
1537 BW_LOG_MASK = 1,
1538 LAT_LOG_MASK = 2,
1539 SLAT_LOG_MASK = 4,
1540 CLAT_LOG_MASK = 8,
1541 IOPS_LOG_MASK = 16,
1542 CLAT_HIST_LOG_MASK = 32,
1543
1544 ALL_LOG_NR = 6,
1545};
1546
1547struct log_type {
1548 unsigned int mask;
1549 int (*fn)(struct thread_data *, int, bool);
1550};
1551
1552static struct log_type log_types[] = {
1553 {
1554 .mask = BW_LOG_MASK,
1555 .fn = write_bandw_log,
1556 },
1557 {
1558 .mask = LAT_LOG_MASK,
1559 .fn = write_lat_log,
1560 },
1561 {
1562 .mask = SLAT_LOG_MASK,
1563 .fn = write_slat_log,
1564 },
1565 {
1566 .mask = CLAT_LOG_MASK,
1567 .fn = write_clat_log,
1568 },
1569 {
1570 .mask = IOPS_LOG_MASK,
1571 .fn = write_iops_log,
1572 },
1573 {
1574 .mask = CLAT_HIST_LOG_MASK,
1575 .fn = write_clat_hist_log,
1576 }
1577};
1578
1579void td_writeout_logs(struct thread_data *td, bool unit_logs)
1580{
1581 unsigned int log_mask = 0;
1582 unsigned int log_left = ALL_LOG_NR;
1583 int old_state, i;
1584
1585 old_state = td_bump_runstate(td, TD_FINISHING);
1586
1587 finalize_logs(td, unit_logs);
1588
1589 while (log_left) {
1590 int prev_log_left = log_left;
1591
1592 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
1593 struct log_type *lt = &log_types[i];
1594 int ret;
1595
1596 if (!(log_mask & lt->mask)) {
1597 ret = lt->fn(td, log_left != 1, unit_logs);
1598 if (!ret) {
1599 log_left--;
1600 log_mask |= lt->mask;
1601 }
1602 }
1603 }
1604
1605 if (prev_log_left == log_left)
1606 usleep(5000);
1607 }
1608
1609 td_restore_runstate(td, old_state);
1610}
1611
1612void fio_writeout_logs(bool unit_logs)
1613{
1614 struct thread_data *td;
1615 int i;
1616
1617 for_each_td(td, i)
1618 td_writeout_logs(td, unit_logs);
1619}