Token-based flow control
[fio.git] / diskutil.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <sys/time.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <dirent.h>
7 #include <libgen.h>
8 #include <math.h>
9
10 #include "fio.h"
11 #include "smalloc.h"
12 #include "diskutil.h"
13
14 static int last_majdev, last_mindev;
15 static struct disk_util *last_du;
16
17 FLIST_HEAD(disk_list);
18
19 static struct disk_util *__init_per_file_disk_util(struct thread_data *td,
20                 int majdev, int mindev, char *path);
21
22 static void disk_util_free(struct disk_util *du)
23 {
24         if (du == last_du)
25                 last_du = NULL;
26
27         while (!flist_empty(&du->slaves)) {
28                 struct disk_util *slave;
29
30                 slave = flist_entry(du->slaves.next, struct disk_util, slavelist);
31                 flist_del(&slave->slavelist);
32                 slave->users--;
33         }
34         
35         fio_mutex_remove(du->lock);
36         sfree(du);
37 }
38
39 static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus)
40 {
41         unsigned in_flight;
42         unsigned long long sectors[2];
43         char line[256];
44         FILE *f;
45         char *p;
46         int ret;
47
48         dprint(FD_DISKUTIL, "open stat file: %s\n", du->path);
49
50         f = fopen(du->path, "r");
51         if (!f)
52                 return 1;
53
54         p = fgets(line, sizeof(line), f);
55         if (!p) {
56                 fclose(f);
57                 return 1;
58         }
59
60         dprint(FD_DISKUTIL, "%s: %s", du->path, p);
61
62         ret = sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0],
63                                         &dus->merges[0], &sectors[0],
64                                         &dus->ticks[0], &dus->ios[1],
65                                         &dus->merges[1], &sectors[1],
66                                         &dus->ticks[1], &in_flight,
67                                         &dus->io_ticks, &dus->time_in_queue);
68         fclose(f);
69         dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1);
70         dus->sectors[0] = sectors[0];
71         dus->sectors[1] = sectors[1];
72         return ret != 11;
73 }
74
75 static void update_io_tick_disk(struct disk_util *du)
76 {
77         struct disk_util_stat __dus, *dus, *ldus;
78         struct timeval t;
79
80         if (!du->users)
81                 return;
82         if (get_io_ticks(du, &__dus))
83                 return;
84
85         dus = &du->dus;
86         ldus = &du->last_dus;
87
88         dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]);
89         dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]);
90         dus->ios[0] += (__dus.ios[0] - ldus->ios[0]);
91         dus->ios[1] += (__dus.ios[1] - ldus->ios[1]);
92         dus->merges[0] += (__dus.merges[0] - ldus->merges[0]);
93         dus->merges[1] += (__dus.merges[1] - ldus->merges[1]);
94         dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]);
95         dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]);
96         dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
97         dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
98
99         fio_gettime(&t, NULL);
100         dus->msec += mtime_since(&du->time, &t);
101         memcpy(&du->time, &t, sizeof(t));
102         memcpy(ldus, &__dus, sizeof(__dus));
103 }
104
105 void update_io_ticks(void)
106 {
107         struct flist_head *entry;
108         struct disk_util *du;
109
110         dprint(FD_DISKUTIL, "update io ticks\n");
111
112         flist_for_each(entry, &disk_list) {
113                 du = flist_entry(entry, struct disk_util, list);
114                 update_io_tick_disk(du);
115         }
116 }
117
118 static struct disk_util *disk_util_exists(int major, int minor)
119 {
120         struct flist_head *entry;
121         struct disk_util *du;
122
123         flist_for_each(entry, &disk_list) {
124                 du = flist_entry(entry, struct disk_util, list);
125
126                 if (major == du->major && minor == du->minor)
127                         return du;
128         }
129
130         return NULL;
131 }
132
133 static int get_device_numbers(char *file_name, int *maj, int *min)
134 {
135         struct stat st;
136         int majdev, mindev;
137         char tempname[PATH_MAX], *p;
138
139         if (!lstat(file_name, &st)) {
140                 if (S_ISBLK(st.st_mode)) {
141                         majdev = major(st.st_rdev);
142                         mindev = minor(st.st_rdev);
143                 } else if (S_ISCHR(st.st_mode)) {
144                         majdev = major(st.st_rdev);
145                         mindev = minor(st.st_rdev);
146                         if (fio_lookup_raw(st.st_rdev, &majdev, &mindev))
147                                 return -1;
148                 } else if (S_ISFIFO(st.st_mode))
149                         return -1;
150                 else {
151                         majdev = major(st.st_dev);
152                         mindev = minor(st.st_dev);
153                 }
154         } else {
155                 /*
156                  * must be a file, open "." in that path
157                  */
158                 strncpy(tempname, file_name, PATH_MAX - 1);
159                 p = dirname(tempname);
160                 if (stat(p, &st)) {
161                         perror("disk util stat");
162                         return -1;
163                 }
164
165                 majdev = major(st.st_dev);
166                 mindev = minor(st.st_dev);
167         }
168
169         *min = mindev;
170         *maj = majdev;
171
172         return 0;
173 }
174
175 static int read_block_dev_entry(char *path, int *maj, int *min)
176 {
177         char line[256], *p;
178         FILE *f;
179
180         f = fopen(path, "r");
181         if (!f) {
182                 perror("open path");
183                 return 1;
184         }
185
186         p = fgets(line, sizeof(line), f);
187         fclose(f);
188
189         if (!p)
190                 return 1;
191
192         if (sscanf(p, "%u:%u", maj, min) != 2)
193                 return 1;
194
195         return 0;
196 }
197
198 static void find_add_disk_slaves(struct thread_data *td, char *path,
199                                  struct disk_util *masterdu)
200 {
201         DIR *dirhandle = NULL;
202         struct dirent *dirent = NULL;
203         char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX];
204         struct disk_util *slavedu = NULL;
205         int majdev, mindev;
206         ssize_t linklen;
207
208         sprintf(slavesdir, "%s/%s", path, "slaves");
209         dirhandle = opendir(slavesdir);
210         if (!dirhandle)
211                 return;
212
213         while ((dirent = readdir(dirhandle)) != NULL) {
214                 if (!strcmp(dirent->d_name, ".") ||
215                     !strcmp(dirent->d_name, ".."))
216                         continue;
217
218                 sprintf(temppath, "%s/%s", slavesdir, dirent->d_name);
219                 /* Can we always assume that the slaves device entries
220                  * are links to the real directories for the slave
221                  * devices?
222                  */
223                 linklen = readlink(temppath, slavepath, PATH_MAX - 0);
224                 if (linklen  < 0) {
225                         perror("readlink() for slave device.");
226                         return;
227                 }
228                 slavepath[linklen] = '\0';
229
230                 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath);
231                 if (read_block_dev_entry(temppath, &majdev, &mindev)) {
232                         perror("Error getting slave device numbers.");
233                         return;
234                 }
235
236                 /*
237                  * See if this maj,min already exists
238                  */
239                 slavedu = disk_util_exists(majdev, mindev);
240                 if (slavedu)
241                         continue;
242
243                 sprintf(temppath, "%s/%s", slavesdir, slavepath);
244                 __init_per_file_disk_util(td, majdev, mindev, temppath);
245                 slavedu = disk_util_exists(majdev, mindev);
246
247                 /* Should probably use an assert here. slavedu should
248                  * always be present at this point. */
249                 if (slavedu) {
250                         slavedu->users++;
251                         flist_add_tail(&slavedu->slavelist, &masterdu->slaves);
252                 }
253         }
254
255         closedir(dirhandle);
256 }
257
258 static struct disk_util *disk_util_add(struct thread_data *td, int majdev,
259                                        int mindev, char *path)
260 {
261         struct disk_util *du, *__du;
262         struct flist_head *entry;
263
264         dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path);
265
266         du = smalloc(sizeof(*du));
267         memset(du, 0, sizeof(*du));
268         INIT_FLIST_HEAD(&du->list);
269         sprintf(du->path, "%s/stat", path);
270         strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ);
271         du->sysfs_root = path;
272         du->major = majdev;
273         du->minor = mindev;
274         INIT_FLIST_HEAD(&du->slavelist);
275         INIT_FLIST_HEAD(&du->slaves);
276         du->lock = fio_mutex_init(1);
277         du->users = 0;
278
279         flist_for_each(entry, &disk_list) {
280                 __du = flist_entry(entry, struct disk_util, list);
281
282                 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name);
283
284                 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) {
285                         disk_util_free(du);
286                         return __du;
287                 }
288         }
289
290         dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name);
291
292         fio_gettime(&du->time, NULL);
293         get_io_ticks(du, &du->last_dus);
294
295         flist_add_tail(&du->list, &disk_list);
296         find_add_disk_slaves(td, path, du);
297         return du;
298 }
299
300 static int check_dev_match(int majdev, int mindev, char *path)
301 {
302         int major, minor;
303
304         if (read_block_dev_entry(path, &major, &minor))
305                 return 1;
306
307         if (majdev == major && mindev == minor)
308                 return 0;
309
310         return 1;
311 }
312
313 static int find_block_dir(int majdev, int mindev, char *path, int link_ok)
314 {
315         struct dirent *dir;
316         struct stat st;
317         int found = 0;
318         DIR *D;
319
320         D = opendir(path);
321         if (!D)
322                 return 0;
323
324         while ((dir = readdir(D)) != NULL) {
325                 char full_path[256];
326
327                 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
328                         continue;
329
330                 sprintf(full_path, "%s/%s", path, dir->d_name);
331
332                 if (!strcmp(dir->d_name, "dev")) {
333                         if (!check_dev_match(majdev, mindev, full_path)) {
334                                 found = 1;
335                                 break;
336                         }
337                 }
338
339                 if (link_ok) {
340                         if (stat(full_path, &st) == -1) {
341                                 perror("stat");
342                                 break;
343                         }
344                 } else {
345                         if (lstat(full_path, &st) == -1) {
346                                 perror("stat");
347                                 break;
348                         }
349                 }
350
351                 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
352                         continue;
353
354                 found = find_block_dir(majdev, mindev, full_path, 0);
355                 if (found) {
356                         strcpy(path, full_path);
357                         break;
358                 }
359         }
360
361         closedir(D);
362         return found;
363 }
364
365 static struct disk_util *__init_per_file_disk_util(struct thread_data *td,
366                                                    int majdev, int mindev,
367                                                    char *path)
368 {
369         struct stat st;
370         char tmp[PATH_MAX];
371         char *p;
372
373         /*
374          * If there's a ../queue/ directory there, we are inside a partition.
375          * Check if that is the case and jump back. For loop/md/dm etc we
376          * are already in the right spot.
377          */
378         sprintf(tmp, "%s/../queue", path);
379         if (!stat(tmp, &st)) {
380                 p = dirname(path);
381                 sprintf(tmp, "%s/queue", p);
382                 if (stat(tmp, &st)) {
383                         log_err("unknown sysfs layout\n");
384                         return NULL;
385                 }
386                 strncpy(tmp, p, PATH_MAX - 1);
387                 sprintf(path, "%s", tmp);
388         }
389
390         if (td->o.ioscheduler && !td->sysfs_root)
391                 td->sysfs_root = strdup(path);
392
393         return disk_util_add(td, majdev, mindev, path);
394 }
395
396 static struct disk_util *init_per_file_disk_util(struct thread_data *td,
397                                                  char *filename)
398 {
399
400         char foo[PATH_MAX];
401         struct disk_util *du;
402         int mindev, majdev;
403
404         if (get_device_numbers(filename, &majdev, &mindev))
405                 return NULL;
406
407         dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev,
408                         mindev);
409
410         du = disk_util_exists(majdev, mindev);
411         if (du) {
412                 if (td->o.ioscheduler && !td->sysfs_root)
413                         td->sysfs_root = strdup(du->sysfs_root);
414
415                 return du;
416         }
417
418         /*
419          * for an fs without a device, we will repeatedly stat through
420          * sysfs which can take oodles of time for thousands of files. so
421          * cache the last lookup and compare with that before going through
422          * everything again.
423          */
424         if (mindev == last_mindev && majdev == last_majdev)
425                 return last_du;
426
427         last_mindev = mindev;
428         last_majdev = majdev;
429
430         sprintf(foo, "/sys/block");
431         if (!find_block_dir(majdev, mindev, foo, 1))
432                 return NULL;
433
434         return __init_per_file_disk_util(td, majdev, mindev, foo);
435 }
436
437 static struct disk_util *__init_disk_util(struct thread_data *td,
438                                           struct fio_file *f)
439 {
440         return init_per_file_disk_util(td, f->file_name);
441 }
442
443 void init_disk_util(struct thread_data *td)
444 {
445         struct fio_file *f;
446         unsigned int i;
447
448         if (!td->o.do_disk_util ||
449             (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL)))
450                 return;
451
452         for_each_file(td, f, i)
453                 f->du = __init_disk_util(td, f);
454 }
455
456 static void show_agg_stats(struct disk_util_agg *agg, int terse)
457 {
458         if (!agg->slavecount)
459                 return;
460
461         if (!terse) {
462                 log_info(", aggrios=%u/%u, aggrmerge=%u/%u, aggrticks=%u/%u,"
463                                 " aggrin_queue=%u, aggrutil=%3.2f%%",
464                                 agg->ios[0] / agg->slavecount,
465                                 agg->ios[1] / agg->slavecount,
466                                 agg->merges[0] / agg->slavecount,
467                                 agg->merges[1] / agg->slavecount,
468                                 agg->ticks[0] / agg->slavecount,
469                                 agg->ticks[1] / agg->slavecount,
470                                 agg->time_in_queue / agg->slavecount,
471                                 agg->max_util.u.f);
472         } else {
473                 log_info(";slaves;%u;%u;%u;%u;%u;%u;%u;%3.2f%%",
474                                 agg->ios[0] / agg->slavecount,
475                                 agg->ios[1] / agg->slavecount,
476                                 agg->merges[0] / agg->slavecount,
477                                 agg->merges[1] / agg->slavecount,
478                                 agg->ticks[0] / agg->slavecount,
479                                 agg->ticks[1] / agg->slavecount,
480                                 agg->time_in_queue / agg->slavecount,
481                                 agg->max_util.u.f);
482         }
483 }
484
485 static void aggregate_slaves_stats(struct disk_util *masterdu)
486 {
487         struct disk_util_agg *agg = &masterdu->agg;
488         struct disk_util_stat *dus;
489         struct flist_head *entry;
490         struct disk_util *slavedu;
491         double util;
492
493         flist_for_each(entry, &masterdu->slaves) {
494                 slavedu = flist_entry(entry, struct disk_util, slavelist);
495                 dus = &slavedu->dus;
496                 agg->ios[0] += dus->ios[0];
497                 agg->ios[1] += dus->ios[1];
498                 agg->merges[0] += dus->merges[0];
499                 agg->merges[1] += dus->merges[1];
500                 agg->sectors[0] += dus->sectors[0];
501                 agg->sectors[1] += dus->sectors[1];
502                 agg->ticks[0] += dus->ticks[0];
503                 agg->ticks[1] += dus->ticks[1];
504                 agg->time_in_queue += dus->time_in_queue;
505                 agg->slavecount++;
506
507                 util = (double) (100 * dus->io_ticks / (double) slavedu->dus.msec);
508                 /* System utilization is the utilization of the
509                  * component with the highest utilization.
510                  */
511                 if (util > agg->max_util.u.f)
512                         agg->max_util.u.f = util;
513
514         }
515
516         if (agg->max_util.u.f > 100.0)
517                 agg->max_util.u.f = 100.0;
518 }
519
520 void free_disk_util(void)
521 {
522         struct disk_util *du;
523
524         while (!flist_empty(&disk_list)) {
525                 du = flist_entry(disk_list.next, struct disk_util, list);
526                 flist_del(&du->list);
527                 disk_util_free(du);
528         }
529
530         last_majdev = last_mindev = -1;
531 }
532
533 void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg,
534                      int terse)
535 {
536         double util = 0;
537
538         if (dus->msec)
539                 util = (double) 100 * dus->io_ticks / (double) dus->msec;
540         if (util > 100.0)
541                 util = 100.0;
542
543         if (!terse) {
544                 if (agg->slavecount)
545                         log_info("  ");
546
547                 log_info("  %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, "
548                          "in_queue=%u, util=%3.2f%%", dus->name,
549                                         dus->ios[0], dus->ios[1],
550                                         dus->merges[0], dus->merges[1],
551                                         dus->ticks[0], dus->ticks[1],
552                                         dus->time_in_queue, util);
553         } else {
554                 log_info(";%s;%u;%u;%u;%u;%u;%u;%u;%3.2f%%",
555                                         dus->name, dus->ios[0], dus->ios[1],
556                                         dus->merges[0], dus->merges[1],
557                                         dus->ticks[0], dus->ticks[1],
558                                         dus->time_in_queue, util);
559         }
560
561         /*
562          * If the device has slaves, aggregate the stats for
563          * those slave devices also.
564          */
565         show_agg_stats(agg, terse);
566
567         if (!terse)
568                 log_info("\n");
569 }
570
571 void show_disk_util(int terse)
572 {
573         struct flist_head *entry;
574         struct disk_util *du;
575
576         if (flist_empty(&disk_list))
577                 return;
578
579         if (!terse)
580                 log_info("\nDisk stats (read/write):\n");
581
582         flist_for_each(entry, &disk_list) {
583                 du = flist_entry(entry, struct disk_util, list);
584
585                 aggregate_slaves_stats(du);
586                 print_disk_util(&du->dus, &du->agg, terse);
587         }
588 }