[PATCH] blktrace: better check for mounted relayfs
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37
38 #include "blktrace.h"
39
40 #define BUF_SIZE        (128 *1024)
41 #define BUF_NR          (4)
42
43 #define RELAYFS_TYPE    0xF0B4A981
44
45 #define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
46 #define COMPARE_MASK_MAP(mmp, str)                                      \
47         (!strcasecmp((mmp)->short_form, (str)) ||                      \
48          !strcasecmp((mmp)->long_form, (str)))
49
50 #define VALID_SET(x)    ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
51
52 struct mask_map {
53         int mask;
54         char *short_form;
55         char *long_form;
56 };
57
58 struct mask_map mask_maps[] = {
59         DECLARE_MASK_MAP(READ),
60         DECLARE_MASK_MAP(WRITE),
61         DECLARE_MASK_MAP(BARRIER),
62         DECLARE_MASK_MAP(SYNC),
63         DECLARE_MASK_MAP(QUEUE),
64         DECLARE_MASK_MAP(REQUEUE),
65         DECLARE_MASK_MAP(ISSUE),
66         DECLARE_MASK_MAP(COMPLETE),
67         DECLARE_MASK_MAP(FS),
68         DECLARE_MASK_MAP(PC),
69 };
70
71 #define S_OPTS  "d:a:A:r:o:kw:"
72 static struct option l_opts[] = {
73         {
74                 .name = "dev",
75                 .has_arg = 1,
76                 .flag = NULL,
77                 .val = 'd'
78         },
79         {
80                 .name = "act-mask",
81                 .has_arg = 1,
82                 .flag = NULL,
83                 .val = 'a'
84         },
85         {
86                 .name = "set-mask",
87                 .has_arg = 1,
88                 .flag = NULL,
89                 .val = 'A'
90         },
91         {
92                 .name = "relay",
93                 .has_arg = 1,
94                 .flag = NULL,
95                 .val = 'r'
96         },
97         {
98                 .name = "output",
99                 .has_arg = 1,
100                 .flag = NULL,
101                 .val = 'o'
102         },
103         {
104                 .name = "kill",
105                 .has_arg = 0,
106                 .flag = NULL,
107                 .val = 'k'
108         },
109         {
110                 .name = "stopwatch",
111                 .has_arg = 1,
112                 .flag = NULL,
113                 .val = 'w'
114         },
115         {
116                 .name = NULL,
117                 .has_arg = 0,
118                 .flag = NULL,
119                 .val = 0
120         }
121 };
122
123 struct thread_information {
124         int cpu;
125         pthread_t thread;
126
127         int fd;
128         char fn[MAXPATHLEN + 64];
129
130         pthread_mutex_t *fd_lock;
131         int ofd;
132
133         unsigned long events_processed;
134         struct device_information *device;
135 };
136
137 struct device_information {
138         int fd;
139         char *path;
140         char buts_name[32];
141         int trace_started;
142         struct thread_information *threads;
143 };
144
145 static int ncpus;
146 static struct thread_information *thread_information;
147 static int ndevs;
148 static struct device_information *device_information;
149
150 /* command line option globals */
151 static char *relay_path;
152 static char *output_name;
153 static int act_mask = ~0U;
154 static int kill_running_trace;
155
156 #define is_done()       (*(volatile int *)(&done))
157 static volatile int done;
158
159 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
160
161 static void exit_trace(int status);
162
163 static int find_mask_map(char *string)
164 {
165         int i;
166
167         for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
168                 if (COMPARE_MASK_MAP(&mask_maps[i], string))
169                         return mask_maps[i].mask;
170
171         return -1;
172 }
173
174 static int start_trace(struct device_information *dip)
175 {
176         struct blk_user_trace_setup buts;
177
178         memset(&buts, 0, sizeof(buts));
179         buts.buf_size = BUF_SIZE;
180         buts.buf_nr = BUF_NR;
181         buts.act_mask = act_mask;
182
183         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
184                 perror("BLKSTARTTRACE");
185                 return 1;
186         }
187
188         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
189         dip->trace_started = 1;
190         return 0;
191 }
192
193 static void stop_trace(struct device_information *dip)
194 {
195         if (dip->trace_started || kill_running_trace) {
196                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
197                         perror("BLKSTOPTRACE");
198                 close(dip->fd);
199                 dip->trace_started = 0;
200         }
201 }
202
203 static void stop_all_traces(void)
204 {
205         struct device_information *dip;
206         int i;
207
208         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
209                 stop_trace(dip);
210 }
211
212 static void *extract_data(struct thread_information *tip, char *ofn, int nb)
213 {
214         int ret, bytes_left;
215         unsigned char *buf, *p;
216
217         buf = malloc(nb);
218         p = buf;
219         bytes_left = nb;
220         while (bytes_left > 0) {
221                 ret = read(tip->fd, p, bytes_left);
222                 if (!ret)
223                         usleep(1000);
224                 else if (ret < 0) {
225                         perror(tip->fn);
226                         fprintf(stderr, "Thread %d extract_data %s failed\n",
227                                 tip->cpu, tip->fn);
228                         free(buf);
229                         exit_trace(1);
230                         return NULL;
231                 } else {
232                         p += ret;
233                         bytes_left -= ret;
234                 }
235         }
236
237         return buf;
238 }
239
240 static inline void tip_fd_unlock(struct thread_information *tip)
241 {
242         if (tip->fd_lock)
243                 pthread_mutex_unlock(tip->fd_lock);
244 }
245
246 static inline void tip_fd_lock(struct thread_information *tip)
247 {
248         if (tip->fd_lock)
249                 pthread_mutex_lock(tip->fd_lock);
250 }
251
252 static void *extract(void *arg)
253 {
254         struct thread_information *tip = arg;
255         int ret, pdu_len;
256         char dp[64], *pdu_data;
257         struct blk_io_trace t;
258         pid_t pid = getpid();
259         cpu_set_t cpu_mask;
260
261         CPU_ZERO(&cpu_mask);
262         CPU_SET((tip->cpu), &cpu_mask);
263
264         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
265                 perror("sched_setaffinity");
266                 exit_trace(1);
267         }
268
269         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
270                         relay_path, tip->device->buts_name, tip->cpu);
271         tip->fd = open(tip->fn, O_RDONLY);
272         if (tip->fd < 0) {
273                 perror(tip->fn);
274                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
275                         tip->fn);
276                 exit_trace(1);
277         }
278
279         pdu_data = NULL;
280         while (!is_done()) {
281                 ret = read(tip->fd, &t, sizeof(t));
282                 if (ret != sizeof(t)) {
283                         if (ret < 0) {
284                                 perror(tip->fn);
285                                 fprintf(stderr,"Thread %d failed read of %s\n",
286                                         tip->cpu, tip->fn);
287                                 exit_trace(1);
288                         } else if (ret > 0) {
289                                 fprintf(stderr,"Thread %d misread %s %d,%d\n",
290                                         tip->cpu, tip->fn, ret, (int)sizeof(t));
291                                 exit_trace(1);
292                         } else {
293                                 usleep(10000);
294                                 continue;
295                         }
296                 }
297
298                 if (verify_trace(&t))
299                         exit_trace(1);
300
301                 pdu_len = t.pdu_len;
302
303                 trace_to_be(&t);
304
305                 if (pdu_len)
306                         pdu_data = extract_data(tip, dp, pdu_len);
307
308                 /*
309                  * now we have both trace and payload, get a lock on the
310                  * output descriptor and send it off
311                  */
312                 tip_fd_lock(tip);
313
314                 ret = write(tip->ofd, &t, sizeof(t));
315                 if (ret < 0) {
316                         fprintf(stderr,"Thread %d failed write\n", tip->cpu);
317                         tip_fd_unlock(tip);
318                         exit_trace(1);
319                 }
320
321                 if (pdu_data) {
322                         ret = write(tip->ofd, pdu_data, pdu_len);
323                         if (ret != pdu_len) {
324                                 perror("write pdu data");
325                                 exit_trace(1);
326                         }
327
328                         free(pdu_data);
329                         pdu_data = NULL;
330                 }
331
332                 tip_fd_unlock(tip);
333                 tip->events_processed++;
334         }
335
336         return NULL;
337 }
338
339 static int start_threads(struct device_information *dip)
340 {
341         struct thread_information *tip;
342         char op[64];
343         int j, pipeline = output_name && !strcmp(output_name, "-");
344
345         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
346                 tip->cpu = j;
347                 tip->device = dip;
348                 tip->fd_lock = NULL;
349                 tip->events_processed = 0;
350
351                 if (pipeline) {
352                         tip->ofd = dup(STDOUT_FILENO);
353                         tip->fd_lock = &stdout_mutex;
354                 } else {
355                         if (output_name)
356                                 sprintf(op, "%s_%s_out.%d", output_name,
357                                         dip->buts_name, tip->cpu);
358                         else
359                                 sprintf(op, "%s_out.%d",
360                                         dip->buts_name, tip->cpu);
361                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
362                 }
363
364                 if (tip->ofd < 0) {
365                         perror(op);
366                         return 1;
367                 }
368
369                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
370                         perror("pthread_create");
371                         close(tip->ofd);
372                         return 1;
373                 }
374         }
375
376         return 0;
377 }
378
379 static void close_thread(struct thread_information *tip)
380 {
381         if (tip->fd != -1)
382                 close(tip->fd);
383         if (tip->ofd != -1)
384                 close(tip->ofd);
385         tip->fd = tip->ofd = -1;
386 }
387
388 static void stop_threads(struct device_information *dip)
389 {
390         struct thread_information *tip;
391         long ret;
392         int j;
393
394         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
395                 if (pthread_join(tip->thread, (void *) &ret))
396                         perror("thread_join");
397                 close_thread(tip);
398         }
399 }
400
401 static void stop_all_threads(void)
402 {
403         struct device_information *dip;
404         int i;
405
406         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
407                 stop_threads(dip);
408 }
409
410 static void stop_all_tracing(void)
411 {
412         struct device_information *dip;
413         struct thread_information *tip;
414         int i, j;
415
416         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
417                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
418                         close_thread(tip);
419                 stop_trace(dip);
420         }
421 }
422
423 static void exit_trace(int status)
424 {
425         stop_all_tracing();
426         exit(status);
427 }
428
429 static int resize_devices(char *path)
430 {
431         int size = (ndevs + 1) * sizeof(struct device_information);
432
433         device_information = realloc(device_information, size);
434         if (!device_information) {
435                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
436                 return 1;
437         }
438         device_information[ndevs].path = path;
439         ndevs++;
440         return 0;
441 }
442
443 static int open_devices(void)
444 {
445         struct device_information *dip;
446         int i;
447
448         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
449                 dip->fd = open(dip->path, O_RDONLY);
450                 if (dip->fd < 0) {
451                         perror(dip->path);
452                         return 1;
453                 }
454         }
455         return 0;
456 }
457
458 static int start_devices(void)
459 {
460         struct device_information *dip;
461         int i, j, size;
462
463         size = ncpus * sizeof(struct thread_information);
464         thread_information = malloc(size * ndevs);
465         if (!thread_information) {
466                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
467                 return 1;
468         }
469
470         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
471                 if (start_trace(dip)) {
472                         close(dip->fd);
473                         fprintf(stderr, "Failed to start trace on %s\n",
474                                 dip->path);
475                         break;
476                 }
477         }
478         if (i != ndevs) {
479                 for (dip = device_information, j = 0; j < i; j++, dip++)
480                         stop_trace(dip);
481                 return 1;
482         }
483
484         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
485                 dip->threads = thread_information + (i * ncpus);
486                 if (start_threads(dip)) {
487                         fprintf(stderr, "Failed to start worker threads\n");
488                         break;
489                 }
490         }
491         if (i != ndevs) {
492                 for (dip = device_information, j = 0; j < i; j++, dip++)
493                         stop_threads(dip);
494                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
495                         stop_trace(dip);
496                 return 1;
497         }
498
499         return 0;
500 }
501
502 static void show_stats(void)
503 {
504         int i, j;
505         struct device_information *dip;
506         struct thread_information *tip;
507         unsigned long long events_processed;
508   
509         if (output_name && !strcmp(output_name, "-"))
510                 return;
511
512         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
513                 printf("Device: %s\n", dip->path);
514                 events_processed = 0;
515                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
516                         printf("  CPU%3d: %20ld events\n",
517                                tip->cpu, tip->events_processed);
518                         events_processed += tip->events_processed;
519                 }
520                 printf("  Total:  %20lld events\n", events_processed);
521         }
522 }
523   
524 static void show_usage(char *program)
525 {
526         fprintf(stderr,"Usage: %s [-d <dev>] "
527                        "[-a <trace> [-a <trace>]] <dev>\n",
528                 program);
529 }
530
531 static void handle_sigint(int sig)
532 {
533         done = 1;
534 }
535
536 int main(int argc, char *argv[])
537 {
538         static char default_relay_path[] = "/relay";
539         struct statfs st;
540         int i, c;
541         int stop_watch = 0;
542         int act_mask_tmp = 0;
543
544         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
545                 switch (c) {
546                 case 'a':
547                         i = find_mask_map(optarg);
548                         if (i < 0) {
549                                 fprintf(stderr,"Invalid action mask %s\n", 
550                                         optarg);
551                                 return 1;
552                         }
553                         act_mask_tmp |= i;
554                         break;
555
556                 case 'A':
557                         if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
558                                 fprintf(stderr,
559                                         "Invalid set action mask %s/0x%x\n", 
560                                         optarg, i);
561                                 return 1;
562                         }
563                         act_mask_tmp = i;
564                         break;
565
566                 case 'd':
567                         if (resize_devices(optarg) != 0)
568                                 return 1;
569                         break;
570
571                 case 'r':
572                         relay_path = optarg;
573                         break;
574
575                 case 'o':
576                         output_name = optarg;
577                         break;
578                 case 'k':
579                         kill_running_trace = 1;
580                         break;
581                 case 'w':
582                         stop_watch = atoi(optarg);
583                         if (stop_watch <= 0) {
584                                 fprintf(stderr,
585                                         "Invalid stopwatch value (%d secs)\n",
586                                         stop_watch);
587                                 return 1;
588                         }
589                         break;
590
591                 default:
592                         show_usage(argv[0]);
593                         return 1;
594                 }
595         }
596
597         while (optind < argc) {
598                 if (resize_devices(argv[optind++]) != 0)
599                         return 1;
600         }
601
602         if (ndevs == 0) {
603                 show_usage(argv[0]);
604                 return 1;
605         }
606
607         if (!relay_path)
608                 relay_path = default_relay_path;
609
610         if (act_mask_tmp != 0)
611                 act_mask = act_mask_tmp;
612
613         if (statfs(relay_path, &st) < 0) {
614                 perror("statfs");
615                 fprintf(stderr,"%s does not appear to be a valid path\n",
616                         relay_path);
617                 return 1;
618         } else if (st.f_type != RELAYFS_TYPE) {
619                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
620                         relay_path);
621                 return 1;
622         }
623
624         if (open_devices() != 0)
625                 return 1;
626
627         if (kill_running_trace) {
628                 stop_all_traces();
629                 return 0;
630         }
631
632         setlocale(LC_NUMERIC, "en_US");
633
634         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
635         if (ncpus < 0) {
636                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
637                 return 1;
638         }
639
640         if (start_devices() != 0)
641                 return 1;
642
643         signal(SIGINT, handle_sigint);
644         signal(SIGHUP, handle_sigint);
645         signal(SIGTERM, handle_sigint);
646         signal(SIGALRM, handle_sigint);
647
648         atexit(stop_all_tracing);
649
650         if (stop_watch)
651                 alarm(stop_watch);
652
653         while (!is_done())
654                 sleep(1);
655
656         stop_all_threads();
657         stop_all_traces();
658         show_stats();
659
660         return 0;
661 }
662