[PATCH] Add support for tracing multiple devices
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <sched.h>
34 #include <ctype.h>
35 #include <getopt.h>
36
37 #include "blktrace.h"
38
39 #define BUF_SIZE        (128 *1024)
40 #define BUF_NR          (4)
41
42 #define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
43 #define COMPARE_MASK_MAP(mmp, str)                                      \
44         (!strcasecmp((mmp)->short_form, (str)) ||                      \
45          !strcasecmp((mmp)->long_form, (str)))
46
47 #define VALID_SET(x)    ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
48
49 struct mask_map {
50         int mask;
51         char *short_form;
52         char *long_form;
53 };
54
55 struct mask_map mask_maps[] = {
56         DECLARE_MASK_MAP(READ),
57         DECLARE_MASK_MAP(WRITE),
58         DECLARE_MASK_MAP(BARRIER),
59         DECLARE_MASK_MAP(SYNC),
60         DECLARE_MASK_MAP(QUEUE),
61         DECLARE_MASK_MAP(REQUEUE),
62         DECLARE_MASK_MAP(ISSUE),
63         DECLARE_MASK_MAP(COMPLETE),
64         DECLARE_MASK_MAP(FS),
65         DECLARE_MASK_MAP(PC),
66 };
67
68 #define S_OPTS  "d:a:A:r:o:k"
69 static struct option l_opts[] = {
70         {
71                 .name = "dev",
72                 .has_arg = 1,
73                 .flag = NULL,
74                 .val = 'd'
75         },
76         {
77                 .name = "act-mask",
78                 .has_arg = 1,
79                 .flag = NULL,
80                 .val = 'a'
81         },
82         {
83                 .name = "set-mask",
84                 .has_arg = 1,
85                 .flag = NULL,
86                 .val = 'A'
87         },
88         {
89                 .name = "relay",
90                 .has_arg = 1,
91                 .flag = NULL,
92                 .val = 'r'
93         },
94         {
95                 .name = "output",
96                 .has_arg = 1,
97                 .flag = NULL,
98                 .val = 'o'
99         },
100         {
101                 .name = "kill",
102                 .has_arg = 0,
103                 .flag = NULL,
104                 .val = 'k'
105         },
106         {
107                 .name = NULL,
108                 .has_arg = 0,
109                 .flag = NULL,
110                 .val = 0
111         }
112 };
113
114 struct thread_information {
115         int cpu;
116         pthread_t thread;
117
118         int fd;
119         char fn[MAXPATHLEN + 64];
120
121         pthread_mutex_t *fd_lock;
122         int ofd;
123
124         unsigned long events_processed;
125         struct device_information *device;
126 };
127
128 struct device_information {
129         int fd;
130         char *path;
131         char buts_name[32];
132         int trace_started;
133         struct thread_information *threads;
134 };
135
136 static int ncpus;
137 static struct thread_information *thread_information;
138 static int ndevs;
139 static struct device_information *device_information;
140
141 /* command line option globals */
142 static char *relay_path;
143 static char *output_name;
144 static int act_mask = ~0U;
145 static int kill_running_trace;
146
147 #define is_done()       (*(volatile int *)(&done))
148 static volatile int done;
149
150 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
151
152 static void exit_trace(int status);
153
154 static int find_mask_map(char *string)
155 {
156         int i;
157
158         for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
159                 if (COMPARE_MASK_MAP(&mask_maps[i], string))
160                         return mask_maps[i].mask;
161
162         return -1;
163 }
164
165 static int start_trace(struct device_information *dip)
166 {
167         struct blk_user_trace_setup buts;
168
169         memset(&buts, 0, sizeof(buts));
170         buts.buf_size = BUF_SIZE;
171         buts.buf_nr = BUF_NR;
172         buts.act_mask = act_mask;
173
174         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
175                 perror("BLKSTARTTRACE");
176                 return 1;
177         }
178
179         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
180         dip->trace_started = 1;
181         return 0;
182 }
183
184 static void stop_trace(struct device_information *dip)
185 {
186         if (dip->trace_started || kill_running_trace) {
187                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
188                         perror("BLKSTOPTRACE");
189                 close(dip->fd);
190                 dip->trace_started = 0;
191         }
192 }
193
194 static void stop_all_traces(void)
195 {
196         struct device_information *dip;
197         int i;
198
199         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
200                 stop_trace(dip);
201 }
202
203 static void *extract_data(struct thread_information *tip, char *ofn, int nb)
204 {
205         int ret, bytes_left;
206         unsigned char *buf, *p;
207
208         buf = malloc(nb);
209         p = buf;
210         bytes_left = nb;
211         while (bytes_left > 0) {
212                 ret = read(tip->fd, p, bytes_left);
213                 if (!ret)
214                         usleep(1000);
215                 else if (ret < 0) {
216                         perror(tip->fn);
217                         fprintf(stderr, "Thread %d extract_data %s failed\n",
218                                 tip->cpu, tip->fn);
219                         free(buf);
220                         exit_trace(1);
221                         return NULL;
222                 } else {
223                         p += ret;
224                         bytes_left -= ret;
225                 }
226         }
227
228         return buf;
229 }
230
231 static inline void tip_fd_unlock(struct thread_information *tip)
232 {
233         if (tip->fd_lock)
234                 pthread_mutex_unlock(tip->fd_lock);
235 }
236
237 static inline void tip_fd_lock(struct thread_information *tip)
238 {
239         if (tip->fd_lock)
240                 pthread_mutex_lock(tip->fd_lock);
241 }
242
243 static void *extract(void *arg)
244 {
245         struct thread_information *tip = arg;
246         int ret, pdu_len;
247         char dp[64], *pdu_data;
248         struct blk_io_trace t;
249         pid_t pid = getpid();
250         cpu_set_t cpu_mask;
251
252         CPU_ZERO(&cpu_mask);
253         CPU_SET((tip->cpu), &cpu_mask);
254
255         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
256                 perror("sched_setaffinity");
257                 exit_trace(1);
258         }
259
260         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
261                         relay_path, tip->device->buts_name, tip->cpu);
262         tip->fd = open(tip->fn, O_RDONLY);
263         if (tip->fd < 0) {
264                 perror(tip->fn);
265                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
266                         tip->fn);
267                 exit_trace(1);
268         }
269
270         pdu_data = NULL;
271         while (!is_done()) {
272                 ret = read(tip->fd, &t, sizeof(t));
273                 if (ret != sizeof(t)) {
274                         if (ret < 0) {
275                                 perror(tip->fn);
276                                 fprintf(stderr,"Thread %d failed read of %s\n",
277                                         tip->cpu, tip->fn);
278                                 exit_trace(1);
279                         } else if (ret > 0) {
280                                 fprintf(stderr,"Thread %d misread %s %d,%d\n",
281                                         tip->cpu, tip->fn, ret, (int)sizeof(t));
282                                 exit_trace(1);
283                         } else {
284                                 usleep(10000);
285                                 continue;
286                         }
287                 }
288
289                 if (verify_trace(&t))
290                         exit_trace(1);
291
292                 pdu_len = t.pdu_len;
293
294                 trace_to_be(&t);
295
296                 if (pdu_len)
297                         pdu_data = extract_data(tip, dp, pdu_len);
298
299                 /*
300                  * now we have both trace and payload, get a lock on the
301                  * output descriptor and send it off
302                  */
303                 tip_fd_lock(tip);
304
305                 ret = write(tip->ofd, &t, sizeof(t));
306                 if (ret < 0) {
307                         fprintf(stderr,"Thread %d failed write\n", tip->cpu);
308                         tip_fd_unlock(tip);
309                         exit_trace(1);
310                 }
311
312                 if (pdu_data) {
313                         ret = write(tip->ofd, pdu_data, pdu_len);
314                         if (ret != pdu_len) {
315                                 perror("write pdu data");
316                                 exit_trace(1);
317                         }
318
319                         free(pdu_data);
320                         pdu_data = NULL;
321                 }
322
323                 tip_fd_unlock(tip);
324                 tip->events_processed++;
325         }
326
327         return NULL;
328 }
329
330 static int start_threads(struct device_information *dip)
331 {
332         struct thread_information *tip;
333         char op[64];
334         int j, pipeline = output_name && !strcmp(output_name, "-");
335
336         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
337                 tip->cpu = j;
338                 tip->device = dip;
339                 tip->fd_lock = NULL;
340                 tip->events_processed = 0;
341
342                 if (pipeline) {
343                         tip->ofd = dup(STDOUT_FILENO);
344                         tip->fd_lock = &stdout_mutex;
345                 } else {
346                         if (output_name)
347                                 sprintf(op, "%s_%s_out.%d", output_name,
348                                         dip->buts_name, tip->cpu);
349                         else
350                                 sprintf(op, "%s_out.%d",
351                                         dip->buts_name, tip->cpu);
352                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
353                 }
354
355                 if (tip->ofd < 0) {
356                         perror(op);
357                         return 1;
358                 }
359
360                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
361                         perror("pthread_create");
362                         close(tip->ofd);
363                         return 1;
364                 }
365         }
366
367         return 0;
368 }
369
370 static void close_thread(struct thread_information *tip)
371 {
372         if (tip->fd != -1)
373                 close(tip->fd);
374         if (tip->ofd != -1)
375                 close(tip->ofd);
376         tip->fd = tip->ofd = -1;
377 }
378
379 static void stop_threads(struct device_information *dip)
380 {
381         struct thread_information *tip;
382         long ret;
383         int j;
384
385         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
386                 if (pthread_join(tip->thread, (void *) &ret))
387                         perror("thread_join");
388                 close_thread(tip);
389         }
390 }
391
392 static void stop_all_threads(void)
393 {
394         struct device_information *dip;
395         int i;
396
397         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
398                 stop_threads(dip);
399 }
400
401 static void stop_all_tracing(void)
402 {
403         struct device_information *dip;
404         struct thread_information *tip;
405         int i, j;
406
407         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
408                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
409                         close_thread(tip);
410                 stop_trace(dip);
411         }
412 }
413
414 static void exit_trace(int status)
415 {
416         stop_all_tracing();
417         exit(status);
418 }
419
420 static int resize_devices(char *path)
421 {
422         int size = (ndevs + 1) * sizeof(struct device_information);
423
424         device_information = realloc(device_information, size);
425         if (!device_information) {
426                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
427                 return 1;
428         }
429         device_information[ndevs].path = path;
430         ndevs++;
431         return 0;
432 }
433
434 static int open_devices(void)
435 {
436         struct device_information *dip;
437         int i;
438
439         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
440                 dip->fd = open(dip->path, O_RDONLY);
441                 if (dip->fd < 0) {
442                         perror(dip->path);
443                         return 1;
444                 }
445         }
446         return 0;
447 }
448
449 static int start_devices(void)
450 {
451         struct device_information *dip;
452         int i, j, size;
453
454         size = ncpus * sizeof(struct thread_information);
455         thread_information = malloc(size * ndevs);
456         if (!thread_information) {
457                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
458                 return 1;
459         }
460
461         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
462                 if (start_trace(dip)) {
463                         close(dip->fd);
464                         fprintf(stderr, "Failed to start trace on %s\n",
465                                 dip->path);
466                         break;
467                 }
468         }
469         if (i != ndevs) {
470                 for (dip = device_information, j = 0; j < i; j++, dip++)
471                         stop_trace(dip);
472                 return 1;
473         }
474
475         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
476                 dip->threads = thread_information + (i * ncpus);
477                 if (start_threads(dip)) {
478                         fprintf(stderr, "Failed to start worker threads\n");
479                         break;
480                 }
481         }
482         if (i != ndevs) {
483                 for (dip = device_information, j = 0; j < i; j++, dip++)
484                         stop_threads(dip);
485                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
486                         stop_trace(dip);
487                 return 1;
488         }
489
490         return 0;
491 }
492
493 static void show_stats(void)
494 {
495         int i, j;
496         struct device_information *dip;
497         struct thread_information *tip;
498         unsigned long long events_processed;
499   
500         if (output_name && !strcmp(output_name, "-"))
501                 return;
502
503         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
504                 printf("Device: %s\n", dip->path);
505                 events_processed = 0;
506                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
507                         printf("  CPU%3d: %20ld events\n",
508                                tip->cpu, tip->events_processed);
509                         events_processed += tip->events_processed;
510                 }
511                 printf("  Total:  %20lld events\n", events_processed);
512         }
513 }
514   
515 static void show_usage(char *program)
516 {
517         fprintf(stderr,"Usage: %s [-d <dev>] "
518                        "[-a <trace> [-a <trace>]] <dev>\n",
519                 program);
520 }
521
522 static void handle_sigint(int sig)
523 {
524         done = 1;
525 }
526
527 int main(int argc, char *argv[])
528 {
529         static char default_relay_path[] = "/relay";
530         struct stat st;
531         int i, c;
532         int act_mask_tmp = 0;
533
534         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
535                 switch (c) {
536                 case 'a':
537                         i = find_mask_map(optarg);
538                         if (i < 0) {
539                                 fprintf(stderr,"Invalid action mask %s\n", 
540                                         optarg);
541                                 return 1;
542                         }
543                         act_mask_tmp |= i;
544                         break;
545
546                 case 'A':
547                         if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
548                                 fprintf(stderr,
549                                         "Invalid set action mask %s/0x%x\n", 
550                                         optarg, i);
551                                 return 1;
552                         }
553                         act_mask_tmp = i;
554                         break;
555
556                 case 'd':
557                         if (resize_devices(optarg) != 0)
558                                 return 1;
559                         break;
560
561                 case 'r':
562                         relay_path = optarg;
563                         break;
564
565                 case 'o':
566                         output_name = optarg;
567                         break;
568                 case 'k':
569                         kill_running_trace = 1;
570                         break;
571
572                 default:
573                         show_usage(argv[0]);
574                         return 1;
575                 }
576         }
577
578         while (optind < argc) {
579                 if (resize_devices(argv[optind++]) != 0)
580                         return 1;
581         }
582
583         if (ndevs == 0) {
584                 show_usage(argv[0]);
585                 return 1;
586         }
587
588         if (!relay_path)
589                 relay_path = default_relay_path;
590
591         if (act_mask_tmp != 0)
592                 act_mask = act_mask_tmp;
593
594         if (stat(relay_path, &st) < 0) {
595                 fprintf(stderr,"%s does not appear to be mounted\n",
596                         relay_path);
597                 return 1;
598         }
599
600         if (open_devices() != 0)
601                 return 1;
602
603         if (kill_running_trace) {
604                 stop_all_traces();
605                 return 0;
606         }
607
608         setlocale(LC_NUMERIC, "en_US");
609
610         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
611         if (ncpus < 0) {
612                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
613                 return 1;
614         }
615
616         if (start_devices() != 0)
617                 return 1;
618
619         signal(SIGINT, handle_sigint);
620         signal(SIGHUP, handle_sigint);
621         signal(SIGTERM, handle_sigint);
622
623         atexit(stop_all_tracing);
624
625         while (!is_done())
626                 sleep(1);
627
628         stop_all_threads();
629         stop_all_traces();
630         show_stats();
631
632         return 0;
633 }
634