Merge branch 'master' of ssh://git.kernel.dk/data/git/fio
[fio.git] / iolog.c
1 /*
2  * Code related to writing an iolog of what a thread is doing, and to
3  * later read that back and replay
4  */
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <libgen.h>
8 #include <assert.h>
9 #include "flist.h"
10 #include "fio.h"
11 #include "verify.h"
12 #include "trim.h"
13 #include "filelock.h"
14
15 static const char iolog_ver2[] = "fio version 2 iolog";
16
17 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
18 {
19         flist_add_tail(&ipo->list, &td->io_log_list);
20         td->total_io_size += ipo->len;
21 }
22
23 void log_io_u(struct thread_data *td, struct io_u *io_u)
24 {
25         const char *act[] = { "read", "write", "sync", "datasync",
26                                 "sync_file_range", "wait", "trim" };
27
28         assert(io_u->ddir <= 6);
29
30         if (!td->o.write_iolog_file)
31                 return;
32
33         fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
34                                                 act[io_u->ddir], io_u->offset,
35                                                 io_u->buflen);
36 }
37
38 void log_file(struct thread_data *td, struct fio_file *f,
39               enum file_log_act what)
40 {
41         const char *act[] = { "add", "open", "close" };
42
43         assert(what < 3);
44
45         if (!td->o.write_iolog_file)
46                 return;
47
48
49         /*
50          * this happens on the pre-open/close done before the job starts
51          */
52         if (!td->iolog_f)
53                 return;
54
55         fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
56 }
57
58 static void iolog_delay(struct thread_data *td, unsigned long delay)
59 {
60         unsigned long usec = utime_since_now(&td->last_issue);
61
62         if (delay < usec)
63                 return;
64
65         delay -= usec;
66
67         /*
68          * less than 100 usec delay, just regard it as noise
69          */
70         if (delay < 100)
71                 return;
72
73         usec_sleep(td, delay);
74 }
75
76 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
77 {
78         struct fio_file *f;
79         int ret;
80
81         /*
82          * Not a special ipo
83          */
84         if (ipo->ddir != DDIR_INVAL)
85                 return 0;
86
87         f = td->files[ipo->fileno];
88
89         switch (ipo->file_action) {
90         case FIO_LOG_OPEN_FILE:
91                 ret = td_io_open_file(td, f);
92                 if (!ret)
93                         break;
94                 td_verror(td, ret, "iolog open file");
95                 return -1;
96         case FIO_LOG_CLOSE_FILE:
97                 td_io_close_file(td, f);
98                 break;
99         case FIO_LOG_UNLINK_FILE:
100                 unlink(f->file_name);
101                 break;
102         default:
103                 log_err("fio: bad file action %d\n", ipo->file_action);
104                 break;
105         }
106
107         return 1;
108 }
109
110 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
111 {
112         struct io_piece *ipo;
113         unsigned long elapsed;
114
115         while (!flist_empty(&td->io_log_list)) {
116                 int ret;
117
118                 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
119                 flist_del(&ipo->list);
120                 remove_trim_entry(td, ipo);
121
122                 ret = ipo_special(td, ipo);
123                 if (ret < 0) {
124                         free(ipo);
125                         break;
126                 } else if (ret > 0) {
127                         free(ipo);
128                         continue;
129                 }
130
131                 io_u->ddir = ipo->ddir;
132                 if (ipo->ddir != DDIR_WAIT) {
133                         io_u->offset = ipo->offset;
134                         io_u->buflen = ipo->len;
135                         io_u->file = td->files[ipo->fileno];
136                         get_file(io_u->file);
137                         dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
138                                                 io_u->buflen, io_u->file->file_name);
139                         if (ipo->delay)
140                                 iolog_delay(td, ipo->delay);
141                 } else {
142                         elapsed = mtime_since_genesis();
143                         if (ipo->delay > elapsed)
144                                 usec_sleep(td, (ipo->delay - elapsed) * 1000);
145                 }
146
147                 free(ipo);
148
149                 if (io_u->ddir != DDIR_WAIT)
150                         return 0;
151         }
152
153         td->done = 1;
154         return 1;
155 }
156
157 void prune_io_piece_log(struct thread_data *td)
158 {
159         struct io_piece *ipo;
160         struct rb_node *n;
161
162         while ((n = rb_first(&td->io_hist_tree)) != NULL) {
163                 ipo = rb_entry(n, struct io_piece, rb_node);
164                 rb_erase(n, &td->io_hist_tree);
165                 remove_trim_entry(td, ipo);
166                 td->io_hist_len--;
167                 free(ipo);
168         }
169
170         while (!flist_empty(&td->io_hist_list)) {
171                 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
172                 flist_del(&ipo->list);
173                 remove_trim_entry(td, ipo);
174                 td->io_hist_len--;
175                 free(ipo);
176         }
177 }
178
179 /*
180  * log a successful write, so we can unwind the log for verify
181  */
182 void log_io_piece(struct thread_data *td, struct io_u *io_u)
183 {
184         struct rb_node **p, *parent;
185         struct io_piece *ipo, *__ipo;
186
187         ipo = malloc(sizeof(struct io_piece));
188         init_ipo(ipo);
189         ipo->file = io_u->file;
190         ipo->offset = io_u->offset;
191         ipo->len = io_u->buflen;
192         ipo->numberio = io_u->numberio;
193         ipo->flags = IP_F_IN_FLIGHT;
194
195         io_u->ipo = ipo;
196
197         if (io_u_should_trim(td, io_u)) {
198                 flist_add_tail(&ipo->trim_list, &td->trim_list);
199                 td->trim_entries++;
200         }
201
202         /*
203          * We don't need to sort the entries, if:
204          *
205          *      Sequential writes, or
206          *      Random writes that lay out the file as it goes along
207          *
208          * For both these cases, just reading back data in the order we
209          * wrote it out is the fastest.
210          *
211          * One exception is if we don't have a random map AND we are doing
212          * verifies, in that case we need to check for duplicate blocks and
213          * drop the old one, which we rely on the rb insert/lookup for
214          * handling.
215          */
216         if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
217               (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
218                 INIT_FLIST_HEAD(&ipo->list);
219                 flist_add_tail(&ipo->list, &td->io_hist_list);
220                 ipo->flags |= IP_F_ONLIST;
221                 td->io_hist_len++;
222                 return;
223         }
224
225         RB_CLEAR_NODE(&ipo->rb_node);
226
227         /*
228          * Sort the entry into the verification list
229          */
230 restart:
231         p = &td->io_hist_tree.rb_node;
232         parent = NULL;
233         while (*p) {
234                 parent = *p;
235
236                 __ipo = rb_entry(parent, struct io_piece, rb_node);
237                 if (ipo->file < __ipo->file)
238                         p = &(*p)->rb_left;
239                 else if (ipo->file > __ipo->file)
240                         p = &(*p)->rb_right;
241                 else if (ipo->offset < __ipo->offset)
242                         p = &(*p)->rb_left;
243                 else if (ipo->offset > __ipo->offset)
244                         p = &(*p)->rb_right;
245                 else {
246                         dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
247                                 __ipo->offset, __ipo->len,
248                                 ipo->offset, ipo->len);
249                         td->io_hist_len--;
250                         rb_erase(parent, &td->io_hist_tree);
251                         remove_trim_entry(td, __ipo);
252                         free(__ipo);
253                         goto restart;
254                 }
255         }
256
257         rb_link_node(&ipo->rb_node, parent, p);
258         rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
259         ipo->flags |= IP_F_ONRB;
260         td->io_hist_len++;
261 }
262
263 void write_iolog_close(struct thread_data *td)
264 {
265         fflush(td->iolog_f);
266         fclose(td->iolog_f);
267         free(td->iolog_buf);
268         td->iolog_f = NULL;
269         td->iolog_buf = NULL;
270 }
271
272 /*
273  * Read version 2 iolog data. It is enhanced to include per-file logging,
274  * syncs, etc.
275  */
276 static int read_iolog2(struct thread_data *td, FILE *f)
277 {
278         unsigned long long offset;
279         unsigned int bytes;
280         int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
281         char *fname, *act;
282         char *str, *p;
283         enum fio_ddir rw;
284
285         free_release_files(td);
286
287         /*
288          * Read in the read iolog and store it, reuse the infrastructure
289          * for doing verifications.
290          */
291         str = malloc(4096);
292         fname = malloc(256+16);
293         act = malloc(256+16);
294
295         reads = writes = waits = 0;
296         while ((p = fgets(str, 4096, f)) != NULL) {
297                 struct io_piece *ipo;
298                 int r;
299
300                 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
301                                                                         &bytes);
302                 if (r == 4) {
303                         /*
304                          * Check action first
305                          */
306                         if (!strcmp(act, "wait"))
307                                 rw = DDIR_WAIT;
308                         else if (!strcmp(act, "read"))
309                                 rw = DDIR_READ;
310                         else if (!strcmp(act, "write"))
311                                 rw = DDIR_WRITE;
312                         else if (!strcmp(act, "sync"))
313                                 rw = DDIR_SYNC;
314                         else if (!strcmp(act, "datasync"))
315                                 rw = DDIR_DATASYNC;
316                         else if (!strcmp(act, "trim"))
317                                 rw = DDIR_TRIM;
318                         else {
319                                 log_err("fio: bad iolog file action: %s\n",
320                                                                         act);
321                                 continue;
322                         }
323                         fileno = get_fileno(td, fname);
324                 } else if (r == 2) {
325                         rw = DDIR_INVAL;
326                         if (!strcmp(act, "add")) {
327                                 fileno = add_file(td, fname, 0, 1);
328                                 file_action = FIO_LOG_ADD_FILE;
329                                 continue;
330                         } else if (!strcmp(act, "open")) {
331                                 fileno = get_fileno(td, fname);
332                                 file_action = FIO_LOG_OPEN_FILE;
333                         } else if (!strcmp(act, "close")) {
334                                 fileno = get_fileno(td, fname);
335                                 file_action = FIO_LOG_CLOSE_FILE;
336                         } else {
337                                 log_err("fio: bad iolog file action: %s\n",
338                                                                         act);
339                                 continue;
340                         }
341                 } else {
342                         log_err("bad iolog2: %s", p);
343                         continue;
344                 }
345
346                 if (rw == DDIR_READ)
347                         reads++;
348                 else if (rw == DDIR_WRITE) {
349                         /*
350                          * Don't add a write for ro mode
351                          */
352                         if (read_only)
353                                 continue;
354                         writes++;
355                 } else if (rw == DDIR_WAIT) {
356                         waits++;
357                 } else if (rw == DDIR_INVAL) {
358                 } else if (!ddir_sync(rw)) {
359                         log_err("bad ddir: %d\n", rw);
360                         continue;
361                 }
362
363                 /*
364                  * Make note of file
365                  */
366                 ipo = malloc(sizeof(*ipo));
367                 init_ipo(ipo);
368                 ipo->ddir = rw;
369                 if (rw == DDIR_WAIT) {
370                         ipo->delay = offset;
371                 } else {
372                         ipo->offset = offset;
373                         ipo->len = bytes;
374                         if (bytes > td->o.max_bs[rw])
375                                 td->o.max_bs[rw] = bytes;
376                         ipo->fileno = fileno;
377                         ipo->file_action = file_action;
378                 }
379
380                 queue_io_piece(td, ipo);
381         }
382
383         free(str);
384         free(act);
385         free(fname);
386
387         if (writes && read_only) {
388                 log_err("fio: <%s> skips replay of %d writes due to"
389                         " read-only\n", td->o.name, writes);
390                 writes = 0;
391         }
392
393         if (!reads && !writes && !waits)
394                 return 1;
395         else if (reads && !writes)
396                 td->o.td_ddir = TD_DDIR_READ;
397         else if (!reads && writes)
398                 td->o.td_ddir = TD_DDIR_WRITE;
399         else
400                 td->o.td_ddir = TD_DDIR_RW;
401
402         return 0;
403 }
404
405 /*
406  * open iolog, check version, and call appropriate parser
407  */
408 static int init_iolog_read(struct thread_data *td)
409 {
410         char buffer[256], *p;
411         FILE *f;
412         int ret;
413
414         f = fopen(td->o.read_iolog_file, "r");
415         if (!f) {
416                 perror("fopen read iolog");
417                 return 1;
418         }
419
420         p = fgets(buffer, sizeof(buffer), f);
421         if (!p) {
422                 td_verror(td, errno, "iolog read");
423                 log_err("fio: unable to read iolog\n");
424                 fclose(f);
425                 return 1;
426         }
427
428         /*
429          * version 2 of the iolog stores a specific string as the
430          * first line, check for that
431          */
432         if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
433                 ret = read_iolog2(td, f);
434         else {
435                 log_err("fio: iolog version 1 is no longer supported\n");
436                 ret = 1;
437         }
438
439         fclose(f);
440         return ret;
441 }
442
443 /*
444  * Set up a log for storing io patterns.
445  */
446 static int init_iolog_write(struct thread_data *td)
447 {
448         struct fio_file *ff;
449         FILE *f;
450         unsigned int i;
451
452         f = fopen(td->o.write_iolog_file, "a");
453         if (!f) {
454                 perror("fopen write iolog");
455                 return 1;
456         }
457
458         /*
459          * That's it for writing, setup a log buffer and we're done.
460           */
461         td->iolog_f = f;
462         td->iolog_buf = malloc(8192);
463         setvbuf(f, td->iolog_buf, _IOFBF, 8192);
464
465         /*
466          * write our version line
467          */
468         if (fprintf(f, "%s\n", iolog_ver2) < 0) {
469                 perror("iolog init\n");
470                 return 1;
471         }
472
473         /*
474          * add all known files
475          */
476         for_each_file(td, ff, i)
477                 log_file(td, ff, FIO_LOG_ADD_FILE);
478
479         return 0;
480 }
481
482 int init_iolog(struct thread_data *td)
483 {
484         int ret = 0;
485
486         if (td->o.read_iolog_file) {
487                 int need_swap;
488
489                 /*
490                  * Check if it's a blktrace file and load that if possible.
491                  * Otherwise assume it's a normal log file and load that.
492                  */
493                 if (is_blktrace(td->o.read_iolog_file, &need_swap))
494                         ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
495                 else
496                         ret = init_iolog_read(td);
497         } else if (td->o.write_iolog_file)
498                 ret = init_iolog_write(td);
499
500         if (ret)
501                 td_verror(td, EINVAL, "failed initializing iolog");
502
503         return ret;
504 }
505
506 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type)
507 {
508         struct io_log *l = malloc(sizeof(*l));
509
510         memset(l, 0, sizeof(*l));
511         l->nr_samples = 0;
512         l->max_samples = 1024;
513         l->log_type = log_type;
514         l->log = malloc(l->max_samples * sizeof(struct io_sample));
515         l->avg_msec = avg_msec;
516         *log = l;
517 }
518
519 #ifdef CONFIG_SETVBUF
520 static void *set_file_buffer(FILE *f)
521 {
522         size_t size = 1048576;
523         void *buf;
524
525         buf = malloc(size);
526         setvbuf(f, buf, _IOFBF, size);
527         return buf;
528 }
529
530 static void clear_file_buffer(void *buf)
531 {
532         free(buf);
533 }
534 #else
535 static void *set_file_buffer(FILE *f)
536 {
537         return NULL;
538 }
539
540 static void clear_file_buffer(void *buf)
541 {
542 }
543 #endif
544
545 void __finish_log(struct io_log *log, const char *name)
546 {
547         unsigned int i;
548         void *buf;
549         FILE *f;
550
551         f = fopen(name, "a");
552         if (!f) {
553                 perror("fopen log");
554                 return;
555         }
556
557         buf = set_file_buffer(f);
558
559         for (i = 0; i < log->nr_samples; i++) {
560                 fprintf(f, "%lu, %lu, %u, %u\n",
561                                 (unsigned long) log->log[i].time,
562                                 (unsigned long) log->log[i].val,
563                                 log->log[i].ddir, log->log[i].bs);
564         }
565
566         fclose(f);
567         clear_file_buffer(buf);
568         free(log->log);
569         free(log);
570 }
571
572 static int finish_log_named(struct thread_data *td, struct io_log *log,
573                             const char *prefix, const char *postfix,
574                             int trylock)
575 {
576         char file_name[256];
577
578         snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
579
580         if (trylock) {
581                 if (fio_trylock_file(file_name))
582                         return 1;
583         } else
584                 fio_lock_file(file_name);
585
586         if (td->client_type == FIO_CLIENT_TYPE_GUI) {
587                 fio_send_iolog(td, log, file_name);
588                 free(log->log);
589                 free(log);
590         } else
591                 __finish_log(log, file_name);
592
593         fio_unlock_file(file_name);
594         return 0;
595 }
596
597 static int finish_log(struct thread_data *td, struct io_log *log,
598                       const char *name, int trylock)
599 {
600         return finish_log_named(td, log, td->o.name, name, trylock);
601 }
602
603 static int write_this_log(struct thread_data *td, struct io_log *log,
604                           const char *log_file, const char *name, int try)
605 {
606         int ret;
607
608         if (!log)
609                 return 0;
610
611         if (log_file)
612                 ret = finish_log_named(td, log, log_file, name, try);
613         else
614                 ret = finish_log(td, log, name, try);
615
616         return ret;
617 }
618
619 static int write_iops_log(struct thread_data *td, int try)
620 {
621         struct thread_options *o = &td->o;
622
623         return write_this_log(td, td->iops_log, o->iops_log_file, "iops", try);
624 }
625
626 static int write_slat_log(struct thread_data *td, int try)
627 {
628         struct thread_options *o = &td->o;
629
630         return write_this_log(td, td->slat_log, o->lat_log_file, "slat", try);
631 }
632
633 static int write_clat_log(struct thread_data *td, int try)
634 {
635         struct thread_options *o = &td->o;
636
637         return write_this_log(td, td->clat_log, o->lat_log_file, "clat" , try);
638 }
639
640 static int write_lat_log(struct thread_data *td, int try)
641 {
642         struct thread_options *o = &td->o;
643
644         return write_this_log(td, td->lat_log, o->lat_log_file, "lat", try);
645 }
646
647 static int write_bandw_log(struct thread_data *td, int try)
648 {
649         struct thread_options *o = &td->o;
650
651         return write_this_log(td, td->bw_log, o->bw_log_file, "bw", try);
652 }
653
654 enum {
655         BW_LOG_MASK     = 1,
656         LAT_LOG_MASK    = 2,
657         SLAT_LOG_MASK   = 4,
658         CLAT_LOG_MASK   = 8,
659         IOPS_LOG_MASK   = 16,
660
661         ALL_LOG_NR      = 5,
662 };
663
664 struct log_type {
665         unsigned int mask;
666         int (*fn)(struct thread_data *, int);
667 };
668
669 static struct log_type log_types[] = {
670         {
671                 .mask   = BW_LOG_MASK,
672                 .fn     = write_bandw_log,
673         },
674         {
675                 .mask   = LAT_LOG_MASK,
676                 .fn     = write_lat_log,
677         },
678         {
679                 .mask   = SLAT_LOG_MASK,
680                 .fn     = write_slat_log,
681         },
682         {
683                 .mask   = CLAT_LOG_MASK,
684                 .fn     = write_clat_log,
685         },
686         {
687                 .mask   = IOPS_LOG_MASK,
688                 .fn     = write_iops_log,
689         },
690 };
691
692 void fio_writeout_logs(struct thread_data *td)
693 {
694         unsigned int log_mask = 0;
695         unsigned int log_left = ALL_LOG_NR;
696         int old_state, i;
697
698         old_state = td_bump_runstate(td, TD_FINISHING);
699
700         finalize_logs(td);
701
702         while (log_left) {
703                 int prev_log_left = log_left;
704
705                 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
706                         struct log_type *lt = &log_types[i];
707                         int ret;
708
709                         if (!(log_mask & lt->mask)) {
710                                 ret = lt->fn(td, log_left != 1);
711                                 if (!ret) {
712                                         log_left--;
713                                         log_mask |= lt->mask;
714                                 }
715                         }
716                 }
717
718                 if (prev_log_left == log_left)
719                         usleep(5000);
720         }
721
722         td_restore_runstate(td, old_state);
723 }