Merge branch 'master' into gfio
[fio.git] / diskutil.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <sys/time.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <dirent.h>
7 #include <libgen.h>
8 #include <math.h>
9
10 #include "fio.h"
11 #include "smalloc.h"
12 #include "diskutil.h"
13
14 static int last_majdev, last_mindev;
15 static struct disk_util *last_du;
16
17 static struct fio_mutex *disk_util_mutex;
18 static int disk_util_exit;
19
20 FLIST_HEAD(disk_list);
21
22 static struct disk_util *__init_per_file_disk_util(struct thread_data *td,
23                 int majdev, int mindev, char *path);
24
25 static void disk_util_free(struct disk_util *du)
26 {
27         if (du == last_du)
28                 last_du = NULL;
29
30         while (!flist_empty(&du->slaves)) {
31                 struct disk_util *slave;
32
33                 slave = flist_entry(du->slaves.next, struct disk_util, slavelist);
34                 flist_del(&slave->slavelist);
35                 slave->users--;
36         }
37
38         fio_mutex_remove(du->lock);
39         sfree(du);
40 }
41
42 static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus)
43 {
44         unsigned in_flight;
45         unsigned long long sectors[2];
46         char line[256];
47         FILE *f;
48         char *p;
49         int ret;
50
51         dprint(FD_DISKUTIL, "open stat file: %s\n", du->path);
52
53         f = fopen(du->path, "r");
54         if (!f)
55                 return 1;
56
57         p = fgets(line, sizeof(line), f);
58         if (!p) {
59                 fclose(f);
60                 return 1;
61         }
62
63         dprint(FD_DISKUTIL, "%s: %s", du->path, p);
64
65         ret = sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0],
66                                         &dus->merges[0], &sectors[0],
67                                         &dus->ticks[0], &dus->ios[1],
68                                         &dus->merges[1], &sectors[1],
69                                         &dus->ticks[1], &in_flight,
70                                         &dus->io_ticks, &dus->time_in_queue);
71         fclose(f);
72         dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1);
73         dus->sectors[0] = sectors[0];
74         dus->sectors[1] = sectors[1];
75         return ret != 11;
76 }
77
78 static void update_io_tick_disk(struct disk_util *du)
79 {
80         struct disk_util_stat __dus, *dus, *ldus;
81         struct timeval t;
82
83         if (!du->users)
84                 return;
85         if (get_io_ticks(du, &__dus))
86                 return;
87
88         dus = &du->dus;
89         ldus = &du->last_dus;
90
91         dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]);
92         dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]);
93         dus->ios[0] += (__dus.ios[0] - ldus->ios[0]);
94         dus->ios[1] += (__dus.ios[1] - ldus->ios[1]);
95         dus->merges[0] += (__dus.merges[0] - ldus->merges[0]);
96         dus->merges[1] += (__dus.merges[1] - ldus->merges[1]);
97         dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]);
98         dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]);
99         dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
100         dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
101
102         fio_gettime(&t, NULL);
103         dus->msec += mtime_since(&du->time, &t);
104         memcpy(&du->time, &t, sizeof(t));
105         memcpy(ldus, &__dus, sizeof(__dus));
106 }
107
108 int update_io_ticks(void)
109 {
110         struct flist_head *entry;
111         struct disk_util *du;
112         int ret = 0;
113
114         dprint(FD_DISKUTIL, "update io ticks\n");
115
116         fio_mutex_down(disk_util_mutex);
117
118         if (!disk_util_exit) {
119                 flist_for_each(entry, &disk_list) {
120                         du = flist_entry(entry, struct disk_util, list);
121                         update_io_tick_disk(du);
122                 }
123         } else
124                 ret = 1;
125
126         fio_mutex_up(disk_util_mutex);
127         return ret;
128 }
129
130 static struct disk_util *disk_util_exists(int major, int minor)
131 {
132         struct flist_head *entry;
133         struct disk_util *du;
134
135         flist_for_each(entry, &disk_list) {
136                 du = flist_entry(entry, struct disk_util, list);
137
138                 if (major == du->major && minor == du->minor)
139                         return du;
140         }
141
142         return NULL;
143 }
144
145 static int get_device_numbers(char *file_name, int *maj, int *min)
146 {
147         struct stat st;
148         int majdev, mindev;
149         char tempname[PATH_MAX], *p;
150
151         if (!lstat(file_name, &st)) {
152                 if (S_ISBLK(st.st_mode)) {
153                         majdev = major(st.st_rdev);
154                         mindev = minor(st.st_rdev);
155                 } else if (S_ISCHR(st.st_mode)) {
156                         majdev = major(st.st_rdev);
157                         mindev = minor(st.st_rdev);
158                         if (fio_lookup_raw(st.st_rdev, &majdev, &mindev))
159                                 return -1;
160                 } else if (S_ISFIFO(st.st_mode))
161                         return -1;
162                 else {
163                         majdev = major(st.st_dev);
164                         mindev = minor(st.st_dev);
165                 }
166         } else {
167                 /*
168                  * must be a file, open "." in that path
169                  */
170                 strncpy(tempname, file_name, PATH_MAX - 1);
171                 p = dirname(tempname);
172                 if (stat(p, &st)) {
173                         perror("disk util stat");
174                         return -1;
175                 }
176
177                 majdev = major(st.st_dev);
178                 mindev = minor(st.st_dev);
179         }
180
181         *min = mindev;
182         *maj = majdev;
183
184         return 0;
185 }
186
187 static int read_block_dev_entry(char *path, int *maj, int *min)
188 {
189         char line[256], *p;
190         FILE *f;
191
192         f = fopen(path, "r");
193         if (!f) {
194                 perror("open path");
195                 return 1;
196         }
197
198         p = fgets(line, sizeof(line), f);
199         fclose(f);
200
201         if (!p)
202                 return 1;
203
204         if (sscanf(p, "%u:%u", maj, min) != 2)
205                 return 1;
206
207         return 0;
208 }
209
210 static void find_add_disk_slaves(struct thread_data *td, char *path,
211                                  struct disk_util *masterdu)
212 {
213         DIR *dirhandle = NULL;
214         struct dirent *dirent = NULL;
215         char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX];
216         struct disk_util *slavedu = NULL;
217         int majdev, mindev;
218         ssize_t linklen;
219
220         sprintf(slavesdir, "%s/%s", path, "slaves");
221         dirhandle = opendir(slavesdir);
222         if (!dirhandle)
223                 return;
224
225         while ((dirent = readdir(dirhandle)) != NULL) {
226                 if (!strcmp(dirent->d_name, ".") ||
227                     !strcmp(dirent->d_name, ".."))
228                         continue;
229
230                 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, dirent->d_name);
231                 /* Can we always assume that the slaves device entries
232                  * are links to the real directories for the slave
233                  * devices?
234                  */
235                 linklen = readlink(temppath, slavepath, PATH_MAX - 0);
236                 if (linklen  < 0) {
237                         perror("readlink() for slave device.");
238                         return;
239                 }
240                 slavepath[linklen] = '\0';
241
242                 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath);
243                 if (read_block_dev_entry(temppath, &majdev, &mindev)) {
244                         perror("Error getting slave device numbers.");
245                         return;
246                 }
247
248                 /*
249                  * See if this maj,min already exists
250                  */
251                 slavedu = disk_util_exists(majdev, mindev);
252                 if (slavedu)
253                         continue;
254
255                 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, slavepath);
256                 __init_per_file_disk_util(td, majdev, mindev, temppath);
257                 slavedu = disk_util_exists(majdev, mindev);
258
259                 /* Should probably use an assert here. slavedu should
260                  * always be present at this point. */
261                 if (slavedu) {
262                         slavedu->users++;
263                         flist_add_tail(&slavedu->slavelist, &masterdu->slaves);
264                 }
265         }
266
267         closedir(dirhandle);
268 }
269
270 static struct disk_util *disk_util_add(struct thread_data *td, int majdev,
271                                        int mindev, char *path)
272 {
273         struct disk_util *du, *__du;
274         struct flist_head *entry;
275
276         dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path);
277
278         du = smalloc(sizeof(*du));
279         memset(du, 0, sizeof(*du));
280         INIT_FLIST_HEAD(&du->list);
281         sprintf(du->path, "%s/stat", path);
282         strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ);
283         du->sysfs_root = path;
284         du->major = majdev;
285         du->minor = mindev;
286         INIT_FLIST_HEAD(&du->slavelist);
287         INIT_FLIST_HEAD(&du->slaves);
288         du->lock = fio_mutex_init(1);
289         du->users = 0;
290
291         fio_mutex_down(disk_util_mutex);
292
293         flist_for_each(entry, &disk_list) {
294                 __du = flist_entry(entry, struct disk_util, list);
295
296                 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name);
297
298                 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) {
299                         disk_util_free(du);
300                         fio_mutex_up(disk_util_mutex);
301                         return __du;
302                 }
303         }
304
305         dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name);
306
307         fio_gettime(&du->time, NULL);
308         get_io_ticks(du, &du->last_dus);
309
310         flist_add_tail(&du->list, &disk_list);
311         find_add_disk_slaves(td, path, du);
312         fio_mutex_up(disk_util_mutex);
313         return du;
314 }
315
316 static int check_dev_match(int majdev, int mindev, char *path)
317 {
318         int major, minor;
319
320         if (read_block_dev_entry(path, &major, &minor))
321                 return 1;
322
323         if (majdev == major && mindev == minor)
324                 return 0;
325
326         return 1;
327 }
328
329 static int find_block_dir(int majdev, int mindev, char *path, int link_ok)
330 {
331         struct dirent *dir;
332         struct stat st;
333         int found = 0;
334         DIR *D;
335
336         D = opendir(path);
337         if (!D)
338                 return 0;
339
340         while ((dir = readdir(D)) != NULL) {
341                 char full_path[256];
342
343                 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
344                         continue;
345
346                 sprintf(full_path, "%s%s%s", path, FIO_OS_PATH_SEPARATOR, dir->d_name);
347
348                 if (!strcmp(dir->d_name, "dev")) {
349                         if (!check_dev_match(majdev, mindev, full_path)) {
350                                 found = 1;
351                                 break;
352                         }
353                 }
354
355                 if (link_ok) {
356                         if (stat(full_path, &st) == -1) {
357                                 perror("stat");
358                                 break;
359                         }
360                 } else {
361                         if (lstat(full_path, &st) == -1) {
362                                 perror("stat");
363                                 break;
364                         }
365                 }
366
367                 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
368                         continue;
369
370                 found = find_block_dir(majdev, mindev, full_path, 0);
371                 if (found) {
372                         strcpy(path, full_path);
373                         break;
374                 }
375         }
376
377         closedir(D);
378         return found;
379 }
380
381 static struct disk_util *__init_per_file_disk_util(struct thread_data *td,
382                                                    int majdev, int mindev,
383                                                    char *path)
384 {
385         struct stat st;
386         char tmp[PATH_MAX];
387         char *p;
388
389         /*
390          * If there's a ../queue/ directory there, we are inside a partition.
391          * Check if that is the case and jump back. For loop/md/dm etc we
392          * are already in the right spot.
393          */
394         sprintf(tmp, "%s/../queue", path);
395         if (!stat(tmp, &st)) {
396                 p = dirname(path);
397                 sprintf(tmp, "%s/queue", p);
398                 if (stat(tmp, &st)) {
399                         log_err("unknown sysfs layout\n");
400                         return NULL;
401                 }
402                 strncpy(tmp, p, PATH_MAX - 1);
403                 sprintf(path, "%s", tmp);
404         }
405
406         if (td->o.ioscheduler && !td->sysfs_root)
407                 td->sysfs_root = strdup(path);
408
409         return disk_util_add(td, majdev, mindev, path);
410 }
411
412 static struct disk_util *init_per_file_disk_util(struct thread_data *td,
413                                                  char *filename)
414 {
415
416         char foo[PATH_MAX];
417         struct disk_util *du;
418         int mindev, majdev;
419
420         if (get_device_numbers(filename, &majdev, &mindev))
421                 return NULL;
422
423         dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev,
424                         mindev);
425
426         du = disk_util_exists(majdev, mindev);
427         if (du) {
428                 if (td->o.ioscheduler && !td->sysfs_root)
429                         td->sysfs_root = strdup(du->sysfs_root);
430
431                 return du;
432         }
433
434         /*
435          * for an fs without a device, we will repeatedly stat through
436          * sysfs which can take oodles of time for thousands of files. so
437          * cache the last lookup and compare with that before going through
438          * everything again.
439          */
440         if (mindev == last_mindev && majdev == last_majdev)
441                 return last_du;
442
443         last_mindev = mindev;
444         last_majdev = majdev;
445
446         sprintf(foo, "/sys/block");
447         if (!find_block_dir(majdev, mindev, foo, 1))
448                 return NULL;
449
450         return __init_per_file_disk_util(td, majdev, mindev, foo);
451 }
452
453 static struct disk_util *__init_disk_util(struct thread_data *td,
454                                           struct fio_file *f)
455 {
456         return init_per_file_disk_util(td, f->file_name);
457 }
458
459 void init_disk_util(struct thread_data *td)
460 {
461         struct fio_file *f;
462         unsigned int i;
463
464         if (!td->o.do_disk_util ||
465             (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL)))
466                 return;
467
468         for_each_file(td, f, i)
469                 f->du = __init_disk_util(td, f);
470 }
471
472 static void show_agg_stats(struct disk_util_agg *agg, int terse)
473 {
474         if (!agg->slavecount)
475                 return;
476
477         if (!terse) {
478                 log_info(", aggrios=%u/%u, aggrmerge=%u/%u, aggrticks=%u/%u,"
479                                 " aggrin_queue=%u, aggrutil=%3.2f%%",
480                                 agg->ios[0] / agg->slavecount,
481                                 agg->ios[1] / agg->slavecount,
482                                 agg->merges[0] / agg->slavecount,
483                                 agg->merges[1] / agg->slavecount,
484                                 agg->ticks[0] / agg->slavecount,
485                                 agg->ticks[1] / agg->slavecount,
486                                 agg->time_in_queue / agg->slavecount,
487                                 agg->max_util.u.f);
488         } else {
489                 log_info(";slaves;%u;%u;%u;%u;%u;%u;%u;%3.2f%%",
490                                 agg->ios[0] / agg->slavecount,
491                                 agg->ios[1] / agg->slavecount,
492                                 agg->merges[0] / agg->slavecount,
493                                 agg->merges[1] / agg->slavecount,
494                                 agg->ticks[0] / agg->slavecount,
495                                 agg->ticks[1] / agg->slavecount,
496                                 agg->time_in_queue / agg->slavecount,
497                                 agg->max_util.u.f);
498         }
499 }
500
501 static void aggregate_slaves_stats(struct disk_util *masterdu)
502 {
503         struct disk_util_agg *agg = &masterdu->agg;
504         struct disk_util_stat *dus;
505         struct flist_head *entry;
506         struct disk_util *slavedu;
507         double util;
508
509         flist_for_each(entry, &masterdu->slaves) {
510                 slavedu = flist_entry(entry, struct disk_util, slavelist);
511                 dus = &slavedu->dus;
512                 agg->ios[0] += dus->ios[0];
513                 agg->ios[1] += dus->ios[1];
514                 agg->merges[0] += dus->merges[0];
515                 agg->merges[1] += dus->merges[1];
516                 agg->sectors[0] += dus->sectors[0];
517                 agg->sectors[1] += dus->sectors[1];
518                 agg->ticks[0] += dus->ticks[0];
519                 agg->ticks[1] += dus->ticks[1];
520                 agg->time_in_queue += dus->time_in_queue;
521                 agg->slavecount++;
522
523                 util = (double) (100 * dus->io_ticks / (double) slavedu->dus.msec);
524                 /* System utilization is the utilization of the
525                  * component with the highest utilization.
526                  */
527                 if (util > agg->max_util.u.f)
528                         agg->max_util.u.f = util;
529
530         }
531
532         if (agg->max_util.u.f > 100.0)
533                 agg->max_util.u.f = 100.0;
534 }
535
536 void free_disk_util(void)
537 {
538         struct disk_util *du;
539
540         disk_util_exit = 1;
541         wait_for_disk_thread_exit();
542
543         fio_mutex_down(disk_util_mutex);
544
545         while (!flist_empty(&disk_list)) {
546                 du = flist_entry(disk_list.next, struct disk_util, list);
547                 flist_del(&du->list);
548                 disk_util_free(du);
549         }
550
551         last_majdev = last_mindev = -1;
552         fio_mutex_up(disk_util_mutex);
553         fio_mutex_remove(disk_util_mutex);
554 }
555
556 void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg,
557                      int terse)
558 {
559         double util = 0;
560
561         if (dus->msec)
562                 util = (double) 100 * dus->io_ticks / (double) dus->msec;
563         if (util > 100.0)
564                 util = 100.0;
565
566         if (!terse) {
567                 if (agg->slavecount)
568                         log_info("  ");
569
570                 log_info("  %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, "
571                          "in_queue=%u, util=%3.2f%%", dus->name,
572                                         dus->ios[0], dus->ios[1],
573                                         dus->merges[0], dus->merges[1],
574                                         dus->ticks[0], dus->ticks[1],
575                                         dus->time_in_queue, util);
576         } else {
577                 log_info(";%s;%u;%u;%u;%u;%u;%u;%u;%3.2f%%",
578                                         dus->name, dus->ios[0], dus->ios[1],
579                                         dus->merges[0], dus->merges[1],
580                                         dus->ticks[0], dus->ticks[1],
581                                         dus->time_in_queue, util);
582         }
583
584         /*
585          * If the device has slaves, aggregate the stats for
586          * those slave devices also.
587          */
588         show_agg_stats(agg, terse);
589
590         if (!terse)
591                 log_info("\n");
592 }
593
594 void show_disk_util(int terse)
595 {
596         struct flist_head *entry;
597         struct disk_util *du;
598
599         fio_mutex_down(disk_util_mutex);
600
601         if (flist_empty(&disk_list)) {
602                 fio_mutex_up(disk_util_mutex);
603                 return;
604         }
605
606         if (!terse)
607                 log_info("\nDisk stats (read/write):\n");
608
609         flist_for_each(entry, &disk_list) {
610                 du = flist_entry(entry, struct disk_util, list);
611
612                 aggregate_slaves_stats(du);
613                 print_disk_util(&du->dus, &du->agg, terse);
614         }
615
616         fio_mutex_up(disk_util_mutex);
617 }
618
619 void setup_disk_util(void)
620 {
621         disk_util_mutex = fio_mutex_init(1);
622 }