[PATCH] blkparse: kill old force debug print
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37 #include <sys/mman.h>
38
39 #include "blktrace.h"
40
41 static char blktrace_version[] = "0.90";
42
43 #define BUF_SIZE        (128 *1024)
44 #define BUF_NR          (4)
45
46 #define RELAYFS_TYPE    0xF0B4A981
47
48 #define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
49 #define COMPARE_MASK_MAP(mmp, str)                                      \
50         (!strcasecmp((mmp)->short_form, (str)) ||                      \
51          !strcasecmp((mmp)->long_form, (str)))
52
53 #define VALID_SET(x)    ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
54
55 struct mask_map {
56         int mask;
57         char *short_form;
58         char *long_form;
59 };
60
61 static struct mask_map mask_maps[] = {
62         DECLARE_MASK_MAP(READ),
63         DECLARE_MASK_MAP(WRITE),
64         DECLARE_MASK_MAP(BARRIER),
65         DECLARE_MASK_MAP(SYNC),
66         DECLARE_MASK_MAP(QUEUE),
67         DECLARE_MASK_MAP(REQUEUE),
68         DECLARE_MASK_MAP(ISSUE),
69         DECLARE_MASK_MAP(COMPLETE),
70         DECLARE_MASK_MAP(FS),
71         DECLARE_MASK_MAP(PC),
72 };
73
74 #define S_OPTS  "d:a:A:r:o:kw:vb:n:"
75 static struct option l_opts[] = {
76         {
77                 .name = "dev",
78                 .has_arg = required_argument,
79                 .flag = NULL,
80                 .val = 'd'
81         },
82         {
83                 .name = "act-mask",
84                 .has_arg = required_argument,
85                 .flag = NULL,
86                 .val = 'a'
87         },
88         {
89                 .name = "set-mask",
90                 .has_arg = required_argument,
91                 .flag = NULL,
92                 .val = 'A'
93         },
94         {
95                 .name = "relay",
96                 .has_arg = required_argument,
97                 .flag = NULL,
98                 .val = 'r'
99         },
100         {
101                 .name = "output",
102                 .has_arg = required_argument,
103                 .flag = NULL,
104                 .val = 'o'
105         },
106         {
107                 .name = "kill",
108                 .has_arg = no_argument,
109                 .flag = NULL,
110                 .val = 'k'
111         },
112         {
113                 .name = "stopwatch",
114                 .has_arg = required_argument,
115                 .flag = NULL,
116                 .val = 'w'
117         },
118         {
119                 .name = "version",
120                 .has_arg = no_argument,
121                 .flag = NULL,
122                 .val = 'v'
123         },
124         {
125                 .name = "buffer size",
126                 .has_arg = required_argument,
127                 .flag = NULL,
128                 .val = 'b'
129         },
130         {
131                 .name = "nr of sub buffers",
132                 .has_arg = required_argument,
133                 .flag = NULL,
134                 .val = 'n'
135         },
136 };
137
138 struct thread_information {
139         int cpu;
140         pthread_t thread;
141
142         int fd;
143         char fn[MAXPATHLEN + 64];
144         void *buf;
145         unsigned long buf_offset;
146         unsigned int buf_subbuf;
147         unsigned int sequence;
148
149         pthread_mutex_t *fd_lock;
150         int ofd;
151
152         unsigned long events_processed;
153         struct device_information *device;
154 };
155
156 struct device_information {
157         int fd;
158         char *path;
159         char buts_name[32];
160         int trace_started;
161         struct thread_information *threads;
162 };
163
164 static int ncpus;
165 static struct thread_information *thread_information;
166 static int ndevs;
167 static struct device_information *device_information;
168
169 /* command line option globals */
170 static char *relay_path;
171 static char *output_name;
172 static int act_mask = ~0U;
173 static int kill_running_trace;
174 static int use_mmap;
175 static unsigned int buf_size = BUF_SIZE;
176 static unsigned int buf_nr = BUF_NR;
177
178 #define is_done()       (*(volatile int *)(&done))
179 static volatile int done;
180
181 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
182
183 static void exit_trace(int status);
184
185 static int find_mask_map(char *string)
186 {
187         unsigned int i;
188
189         for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
190                 if (COMPARE_MASK_MAP(&mask_maps[i], string))
191                         return mask_maps[i].mask;
192
193         return -1;
194 }
195
196 static int start_trace(struct device_information *dip)
197 {
198         struct blk_user_trace_setup buts;
199
200         memset(&buts, 0, sizeof(buts));
201         buts.buf_size = buf_size;
202         buts.buf_nr = buf_nr;
203         buts.act_mask = act_mask;
204
205         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
206                 perror("BLKSTARTTRACE");
207                 return 1;
208         }
209
210         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
211         dip->trace_started = 1;
212         return 0;
213 }
214
215 static void stop_trace(struct device_information *dip)
216 {
217         if (dip->trace_started || kill_running_trace) {
218                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
219                         perror("BLKSTOPTRACE");
220                 close(dip->fd);
221                 dip->trace_started = 0;
222         }
223 }
224
225 static void stop_all_traces(void)
226 {
227         struct device_information *dip;
228         int i;
229
230         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
231                 stop_trace(dip);
232 }
233
234 static int get_data_read(struct thread_information *tip, void *buf, int len)
235 {
236         char *p = buf;
237         int ret, bytes_left = len;
238
239         while (!is_done() && bytes_left > 0) {
240                 ret = read(tip->fd, p, bytes_left);
241                 if (ret == len)
242                         return 0;
243
244                 if (ret < 0) {
245                         perror(tip->fn);
246                         fprintf(stderr,"Thread %d failed read of %s\n",
247                                 tip->cpu, tip->fn);
248                         exit_trace(1);
249                 } else if (ret > 0) {
250                         fprintf(stderr,"Thread %d misread %s %d,%d\n",
251                                 tip->cpu, tip->fn, ret, len);
252                         exit_trace(1);
253                 } else {
254                         p += ret;
255                         bytes_left -= ret;
256                 }
257
258                 usleep(10000);
259         }
260
261         return -1;
262 }
263
264 static int get_data_mmap(struct thread_information *tip, void *buf,
265                          unsigned int len, int check_magic)
266 {
267         if (len > (buf_size * (tip->buf_subbuf + 1)) - tip->buf_offset) {
268                 tip->buf_subbuf++;
269                 if (tip->buf_subbuf == buf_nr)
270                         tip->buf_subbuf = 0;
271
272                 tip->buf_offset = tip->buf_subbuf * buf_size;
273         }
274
275         while (1) {
276                 struct blk_io_trace *t = buf;
277
278                 memcpy(buf, tip->buf + tip->buf_offset, len);
279
280                 if (!check_magic)
281                         break;
282
283                 if (CHECK_MAGIC(t) && t->sequence >= tip->sequence) {
284                         tip->sequence = t->sequence;
285                         break;
286                 }
287         
288                 if (is_done())
289                         return -1;
290
291                 usleep(10000);
292         }
293
294         tip->buf_offset += len;
295         return 0;
296 }
297
298 static int get_data(struct thread_information *tip, void *buf, int len,
299                     int check_magic)
300 {
301         if (tip->buf)
302                 return get_data_mmap(tip, buf, len, check_magic);
303         else
304                 return get_data_read(tip, buf, len);
305 }
306
307 static void *extract_data(struct thread_information *tip, int nb)
308 {
309         unsigned char *buf;
310
311         buf = malloc(nb);
312         if (!get_data(tip, buf, nb, 0))
313                 return buf;
314
315         free(buf);
316         exit_trace(1);
317         return NULL;
318 }
319
320 static inline void tip_fd_unlock(struct thread_information *tip)
321 {
322         if (tip->fd_lock)
323                 pthread_mutex_unlock(tip->fd_lock);
324 }
325
326 static inline void tip_fd_lock(struct thread_information *tip)
327 {
328         if (tip->fd_lock)
329                 pthread_mutex_lock(tip->fd_lock);
330 }
331
332 static void *extract(void *arg)
333 {
334         struct thread_information *tip = arg;
335         int ret, pdu_len;
336         char *pdu_data;
337         struct blk_io_trace t;
338         pid_t pid = getpid();
339         cpu_set_t cpu_mask;
340
341         CPU_ZERO(&cpu_mask);
342         CPU_SET((tip->cpu), &cpu_mask);
343
344         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
345                 perror("sched_setaffinity");
346                 exit_trace(1);
347         }
348
349         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
350                         relay_path, tip->device->buts_name, tip->cpu);
351         tip->fd = open(tip->fn, O_RDONLY);
352         if (tip->fd < 0) {
353                 perror(tip->fn);
354                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
355                         tip->fn);
356                 exit_trace(1);
357         }
358
359         if (use_mmap) {
360                 tip->buf = mmap(NULL, buf_size * buf_nr, PROT_READ,
361                                         MAP_PRIVATE | MAP_POPULATE, tip->fd, 0);
362                 if (tip->buf == MAP_FAILED) {
363                         perror("mmap");
364                         exit_trace(1);
365                 }
366         }
367
368         pdu_data = NULL;
369         while (!is_done()) {
370                 if (get_data(tip, &t, sizeof(t), 1))
371                         break;
372
373                 if (verify_trace(&t))
374                         exit_trace(1);
375
376                 pdu_len = t.pdu_len;
377
378                 trace_to_be(&t);
379
380                 if (pdu_len)
381                         pdu_data = extract_data(tip, pdu_len);
382
383                 /*
384                  * now we have both trace and payload, get a lock on the
385                  * output descriptor and send it off
386                  */
387                 tip_fd_lock(tip);
388
389                 ret = write(tip->ofd, &t, sizeof(t));
390                 if (ret < 0) {
391                         fprintf(stderr,"Thread %d failed write\n", tip->cpu);
392                         tip_fd_unlock(tip);
393                         exit_trace(1);
394                 }
395
396                 if (pdu_data) {
397                         ret = write(tip->ofd, pdu_data, pdu_len);
398                         if (ret != pdu_len) {
399                                 perror("write pdu data");
400                                 tip_fd_unlock(tip);
401                                 exit_trace(1);
402                         }
403
404                         free(pdu_data);
405                         pdu_data = NULL;
406                 }
407
408                 tip_fd_unlock(tip);
409                 tip->events_processed++;
410         }
411
412         return NULL;
413 }
414
415 static int start_threads(struct device_information *dip)
416 {
417         struct thread_information *tip;
418         char op[64];
419         int j, pipeline = output_name && !strcmp(output_name, "-");
420
421         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
422                 tip->cpu = j;
423                 tip->device = dip;
424                 tip->fd_lock = NULL;
425                 tip->events_processed = 0;
426
427                 if (pipeline) {
428                         tip->ofd = dup(STDOUT_FILENO);
429                         tip->fd_lock = &stdout_mutex;
430                 } else {
431                         if (output_name) {
432                                 sprintf(op, "%s.blktrace.%d", output_name,
433                                         tip->cpu);
434                         } else {
435                                 sprintf(op, "%s.blktrace.%d",
436                                         dip->buts_name, tip->cpu);
437                         }
438                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
439                 }
440
441                 if (tip->ofd < 0) {
442                         perror(op);
443                         return 1;
444                 }
445
446                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
447                         perror("pthread_create");
448                         close(tip->ofd);
449                         return 1;
450                 }
451         }
452
453         return 0;
454 }
455
456 static void close_thread(struct thread_information *tip)
457 {
458         if (tip->buf)
459                 munmap(tip->buf, buf_size * buf_nr);
460
461         if (tip->fd != -1)
462                 close(tip->fd);
463         if (tip->ofd != -1)
464                 close(tip->ofd);
465
466         tip->fd = tip->ofd = -1;
467 }
468
469 static void stop_threads(struct device_information *dip)
470 {
471         struct thread_information *tip;
472         long ret;
473         int j;
474
475         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
476                 if (pthread_join(tip->thread, (void *) &ret))
477                         perror("thread_join");
478                 close_thread(tip);
479         }
480 }
481
482 static void stop_all_threads(void)
483 {
484         struct device_information *dip;
485         int i;
486
487         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
488                 stop_threads(dip);
489 }
490
491 static void stop_all_tracing(void)
492 {
493         struct device_information *dip;
494         struct thread_information *tip;
495         int i, j;
496
497         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
498                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
499                         close_thread(tip);
500                 stop_trace(dip);
501         }
502 }
503
504 static void exit_trace(int status)
505 {
506         stop_all_tracing();
507         exit(status);
508 }
509
510 static int resize_devices(char *path)
511 {
512         int size = (ndevs + 1) * sizeof(struct device_information);
513
514         device_information = realloc(device_information, size);
515         if (!device_information) {
516                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
517                 return 1;
518         }
519         device_information[ndevs].path = path;
520         ndevs++;
521         return 0;
522 }
523
524 static int open_devices(void)
525 {
526         struct device_information *dip;
527         int i;
528
529         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
530                 dip->fd = open(dip->path, O_RDONLY);
531                 if (dip->fd < 0) {
532                         perror(dip->path);
533                         return 1;
534                 }
535         }
536         return 0;
537 }
538
539 static int start_devices(void)
540 {
541         struct device_information *dip;
542         int i, j, size;
543
544         size = ncpus * sizeof(struct thread_information);
545         thread_information = malloc(size * ndevs);
546         if (!thread_information) {
547                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
548                 return 1;
549         }
550
551         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
552                 if (start_trace(dip)) {
553                         close(dip->fd);
554                         fprintf(stderr, "Failed to start trace on %s\n",
555                                 dip->path);
556                         break;
557                 }
558         }
559         if (i != ndevs) {
560                 for (dip = device_information, j = 0; j < i; j++, dip++)
561                         stop_trace(dip);
562                 return 1;
563         }
564
565         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
566                 dip->threads = thread_information + (i * ncpus);
567                 if (start_threads(dip)) {
568                         fprintf(stderr, "Failed to start worker threads\n");
569                         break;
570                 }
571         }
572         if (i != ndevs) {
573                 for (dip = device_information, j = 0; j < i; j++, dip++)
574                         stop_threads(dip);
575                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
576                         stop_trace(dip);
577                 return 1;
578         }
579
580         return 0;
581 }
582
583 static void show_stats(void)
584 {
585         int i, j;
586         struct device_information *dip;
587         struct thread_information *tip;
588         unsigned long long events_processed;
589
590         if (output_name && !strcmp(output_name, "-"))
591                 return;
592
593         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
594                 printf("Device: %s\n", dip->path);
595                 events_processed = 0;
596                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
597                         printf("  CPU%3d: %20ld events\n",
598                                tip->cpu, tip->events_processed);
599                         events_processed += tip->events_processed;
600                 }
601                 printf("  Total:  %20lld events\n", events_processed);
602         }
603 }
604
605 static char usage_str[] = \
606         "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
607         "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
608         "\t-d Use specified device. May also be given last after options\n" \
609         "\t-r Path to mounted relayfs, defaults to /relay\n" \
610         "\t-o File(s) to send output to\n" \
611         "\t-k Kill a running trace\n" \
612         "\t-w Stop after defined time, in seconds\n" \
613         "\t-a Only trace specified actions. See documentation\n" \
614         "\t-A Give trace mask as a single value. See documentation\n" \
615         "\t-b Sub buffer size in KiB\n" \
616         "\t-n Number of sub buffers\n" \
617         "\t-v Print program version info\n\n";
618
619 static void show_usage(char *program)
620 {
621         fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
622 }
623
624 static void handle_sigint(__attribute__((__unused__)) int sig)
625 {
626         done = 1;
627 }
628
629 int main(int argc, char *argv[])
630 {
631         static char default_relay_path[] = "/relay";
632         struct statfs st;
633         int i, c;
634         int stop_watch = 0;
635         int act_mask_tmp = 0;
636
637         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
638                 switch (c) {
639                 case 'a':
640                         i = find_mask_map(optarg);
641                         if (i < 0) {
642                                 fprintf(stderr,"Invalid action mask %s\n",
643                                         optarg);
644                                 return 1;
645                         }
646                         act_mask_tmp |= i;
647                         break;
648
649                 case 'A':
650                         if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
651                                 fprintf(stderr,
652                                         "Invalid set action mask %s/0x%x\n",
653                                         optarg, i);
654                                 return 1;
655                         }
656                         act_mask_tmp = i;
657                         break;
658
659                 case 'd':
660                         if (resize_devices(optarg) != 0)
661                                 return 1;
662                         break;
663
664                 case 'r':
665                         relay_path = optarg;
666                         break;
667
668                 case 'o':
669                         output_name = optarg;
670                         break;
671                 case 'k':
672                         kill_running_trace = 1;
673                         break;
674                 case 'w':
675                         stop_watch = atoi(optarg);
676                         if (stop_watch <= 0) {
677                                 fprintf(stderr,
678                                         "Invalid stopwatch value (%d secs)\n",
679                                         stop_watch);
680                                 return 1;
681                         }
682                         break;
683                 case 'v':
684                         printf("%s version %s\n", argv[0], blktrace_version);
685                         return 0;
686                 case 'b':
687                         buf_size = atoi(optarg);
688                         if (buf_size <= 0) {
689                                 fprintf(stderr,
690                                         "Invalid buffer size (%d)\n", buf_size);
691                                 return 1;
692                         }
693                         buf_size <<= 10;
694                         break;
695                 case 'n':
696                         buf_nr = atoi(optarg);
697                         if (buf_nr <= 0) {
698                                 fprintf(stderr,
699                                         "Invalid buffer nr (%d)\n", buf_nr);
700                                 return 1;
701                         }
702                         break;
703                 default:
704                         show_usage(argv[0]);
705                         return 1;
706                 }
707         }
708
709         while (optind < argc) {
710                 if (resize_devices(argv[optind++]) != 0)
711                         return 1;
712         }
713
714         if (ndevs == 0) {
715                 show_usage(argv[0]);
716                 return 1;
717         }
718
719         if (!relay_path)
720                 relay_path = default_relay_path;
721
722         if (act_mask_tmp != 0)
723                 act_mask = act_mask_tmp;
724
725         if (statfs(relay_path, &st) < 0) {
726                 perror("statfs");
727                 fprintf(stderr,"%s does not appear to be a valid path\n",
728                         relay_path);
729                 return 1;
730         } else if (st.f_type != RELAYFS_TYPE) {
731                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
732                         relay_path);
733                 return 1;
734         }
735
736         if (open_devices() != 0)
737                 return 1;
738
739         if (kill_running_trace) {
740                 stop_all_traces();
741                 return 0;
742         }
743
744         setlocale(LC_NUMERIC, "en_US");
745
746         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
747         if (ncpus < 0) {
748                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
749                 return 1;
750         }
751
752         if (start_devices() != 0)
753                 return 1;
754
755         signal(SIGINT, handle_sigint);
756         signal(SIGHUP, handle_sigint);
757         signal(SIGTERM, handle_sigint);
758         signal(SIGALRM, handle_sigint);
759
760         atexit(stop_all_tracing);
761
762         if (stop_watch)
763                 alarm(stop_watch);
764
765         while (!is_done())
766                 sleep(1);
767
768         stop_all_threads();
769         stop_all_traces();
770         show_stats();
771
772         return 0;
773 }
774