[PATCH] blktrace: add ringbuffer in front of kernel buffers
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
d0ca268b
JA
32#include <stdio.h>
33#include <stdlib.h>
34#include <sched.h>
d39c04ca
AB
35#include <ctype.h>
36#include <getopt.h>
da39451f 37#include <errno.h>
a3e4d330 38#include <assert.h>
d0ca268b
JA
39
40#include "blktrace.h"
41
13d928f0 42static char blktrace_version[] = "0.99";
52724a0e 43
8f551a39
JA
44/*
45 * You may want to increase this even more, if you are logging at a high
46 * rate and see skipped/missed events
47 */
007c233c 48#define BUF_SIZE (512 * 1024)
d0ca268b
JA
49#define BUF_NR (4)
50
007c233c
JA
51#define OFILE_BUF (128 * 1024)
52
e3e74029
NS
53#define RELAYFS_TYPE 0xF0B4A981
54
a3e4d330
JA
55#define RING_INIT_NR (2)
56#define RING_MAX_NR (16)
57
57ea8602 58#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:"
d5396421 59static struct option l_opts[] = {
5c86134e 60 {
d39c04ca 61 .name = "dev",
428683db 62 .has_arg = required_argument,
d39c04ca
AB
63 .flag = NULL,
64 .val = 'd'
65 },
5c86134e 66 {
d39c04ca 67 .name = "act-mask",
428683db 68 .has_arg = required_argument,
d39c04ca
AB
69 .flag = NULL,
70 .val = 'a'
71 },
5c86134e 72 {
d39c04ca 73 .name = "set-mask",
428683db 74 .has_arg = required_argument,
d39c04ca
AB
75 .flag = NULL,
76 .val = 'A'
77 },
5c86134e 78 {
5270dddd 79 .name = "relay",
428683db 80 .has_arg = required_argument,
5270dddd
JA
81 .flag = NULL,
82 .val = 'r'
83 },
d5396421
JA
84 {
85 .name = "output",
428683db 86 .has_arg = required_argument,
d5396421
JA
87 .flag = NULL,
88 .val = 'o'
89 },
bc39777c
JA
90 {
91 .name = "kill",
428683db 92 .has_arg = no_argument,
bc39777c
JA
93 .flag = NULL,
94 .val = 'k'
95 },
ece238a6
NS
96 {
97 .name = "stopwatch",
428683db 98 .has_arg = required_argument,
ece238a6
NS
99 .flag = NULL,
100 .val = 'w'
101 },
52724a0e
JA
102 {
103 .name = "version",
104 .has_arg = no_argument,
105 .flag = NULL,
57ea8602 106 .val = 'V'
52724a0e 107 },
129aa440 108 {
3f65c585 109 .name = "buffer-size",
129aa440
JA
110 .has_arg = required_argument,
111 .flag = NULL,
112 .val = 'b'
113 },
114 {
3f65c585 115 .name = "num-sub-buffers",
129aa440
JA
116 .has_arg = required_argument,
117 .flag = NULL,
118 .val = 'n'
119 },
d1d7f15f 120 {
3f65c585 121 .name = "output-dir",
d1d7f15f
JA
122 .has_arg = required_argument,
123 .flag = NULL,
124 .val = 'D'
125 },
71ef8b7c
JA
126 {
127 .name = NULL,
128 }
d39c04ca
AB
129};
130
d0ca268b
JA
131struct thread_information {
132 int cpu;
133 pthread_t thread;
b9d4294e
JA
134
135 int fd;
a3e4d330
JA
136 void *fd_buf;
137 unsigned long fd_off;
138 unsigned long fd_size;
139 unsigned long fd_max_size;
b9d4294e
JA
140 char fn[MAXPATHLEN + 64];
141
d5396421 142 pthread_mutex_t *fd_lock;
007c233c
JA
143 FILE *ofile;
144 char *ofile_buffer;
145
99c1f5ab 146 volatile int closed;
d5396421 147
d0ca268b 148 unsigned long events_processed;
e7c9f3ff 149 struct device_information *device;
d0ca268b
JA
150};
151
e7c9f3ff
NS
152struct device_information {
153 int fd;
154 char *path;
155 char buts_name[32];
99c1f5ab 156 volatile int trace_started;
e7c9f3ff
NS
157 struct thread_information *threads;
158};
d0ca268b 159
e7c9f3ff 160static int ncpus;
d0ca268b 161static struct thread_information *thread_information;
e7c9f3ff
NS
162static int ndevs;
163static struct device_information *device_information;
164
165/* command line option globals */
166static char *relay_path;
d5396421 167static char *output_name;
d1d7f15f 168static char *output_dir;
5c86134e 169static int act_mask = ~0U;
bc39777c 170static int kill_running_trace;
e820abd7
JA
171static unsigned int buf_size = BUF_SIZE;
172static unsigned int buf_nr = BUF_NR;
d39c04ca 173
e7c9f3ff
NS
174#define is_done() (*(volatile int *)(&done))
175static volatile int done;
176
a3e4d330
JA
177#define stopped_and_shown() (*(volatile int *)(&stopped_and_shown))
178static volatile int stopped_and_shown;
179
d5396421
JA
180static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
181
72ca8801
NS
182static void exit_trace(int status);
183
007c233c
JA
184#define tip_closed(tip) (*(volatile int *)(&(tip)->closed))
185#define set_tip_closed(tip) ((tip)->closed = 1)
186
99c1f5ab
JA
187#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
188#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
189
190#define __for_each_dip(__d, __i, __e) \
191 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
192
193#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
194#define for_each_tip(__d, __t, __i) \
195 for (__i = 0, __t = (__d)->threads; __i < ncpus; __i++, __t++)
196
e7c9f3ff 197static int start_trace(struct device_information *dip)
d0ca268b
JA
198{
199 struct blk_user_trace_setup buts;
200
1f79c4a0 201 memset(&buts, 0, sizeof(buts));
129aa440
JA
202 buts.buf_size = buf_size;
203 buts.buf_nr = buf_nr;
d39c04ca 204 buts.act_mask = act_mask;
d0ca268b 205
e7c9f3ff 206 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
d0ca268b
JA
207 perror("BLKSTARTTRACE");
208 return 1;
209 }
210
e7c9f3ff 211 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 212 dip_set_tracing(dip, 1);
d0ca268b
JA
213 return 0;
214}
215
e7c9f3ff 216static void stop_trace(struct device_information *dip)
d0ca268b 217{
99c1f5ab
JA
218 if (dip_tracing(dip) || kill_running_trace) {
219 dip_set_tracing(dip, 0);
cf9208ea 220
e7c9f3ff 221 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
707b0914 222 perror("BLKSTOPTRACE");
cf9208ea 223
e7c9f3ff 224 close(dip->fd);
cf9208ea 225 dip->fd = -1;
707b0914 226 }
d0ca268b
JA
227}
228
e7c9f3ff
NS
229static void stop_all_traces(void)
230{
231 struct device_information *dip;
232 int i;
233
99c1f5ab 234 for_each_dip(dip, i)
e7c9f3ff
NS
235 stop_trace(dip);
236}
237
a3e4d330 238static int __read_data(struct thread_information *tip, void *buf, int len)
d0ca268b 239{
8a43bac5 240 char *p = buf;
a3e4d330 241 int ret, bytes_done = 0;
d0ca268b 242
a3e4d330
JA
243 while (!is_done() && bytes_done < len) {
244 ret = read(tip->fd, p, len - bytes_done);
245 if (ret == (len - bytes_done))
246 return len;
8a43bac5
JA
247
248 if (ret < 0) {
a3e4d330
JA
249 if (errno == EAGAIN) {
250 if (bytes_done)
251 break;
252 usleep(1000);
253 } else {
254 perror(tip->fn);
255 fprintf(stderr,"Thread %d failed read of %s\n",
256 tip->cpu, tip->fn);
257 break;
258 }
8a43bac5 259 } else if (ret > 0) {
d0ca268b 260 p += ret;
a3e4d330
JA
261 bytes_done += ret;
262 } else if (bytes_done)
263 break;
264 else
db6fe5bc 265 usleep(1000);
8a43bac5
JA
266 }
267
a3e4d330
JA
268 if (bytes_done)
269 return bytes_done;
270
3752a433 271 return -1;
8a43bac5
JA
272}
273
a3e4d330
JA
274static int resize_ringbuffer(struct thread_information *tip)
275{
276 if (tip->fd_max_size >= RING_MAX_NR * buf_size * buf_nr)
277 return 1;
278
279 tip->fd_buf = realloc(tip->fd_buf, 2 * tip->fd_max_size);
280 tip->fd_max_size <<= 1;
281 return 0;
282}
283
284static int __refill_ringbuffer(struct thread_information *tip, unsigned int len)
285{
286 unsigned long off;
287 int ret;
288
289 if (len + tip->fd_size > tip->fd_max_size)
290 if (resize_ringbuffer(tip))
291 return 1;
292
293 off = (tip->fd_size + tip->fd_off) & (tip->fd_max_size - 1);
294 if (off + len > tip->fd_max_size)
295 len = tip->fd_max_size - off;
296
297 assert(len > 0);
298
299 ret = __read_data(tip, tip->fd_buf + off, len);
300 if (ret < 0)
301 return -1;
302
303 tip->fd_size += ret;
304 if (ret == len)
305 return 0;
306
307 return 1;
308}
309
310/*
311 * keep filling ring until we get a short read
312 */
313static void refill_ringbuffer(struct thread_information *tip, unsigned int len)
314{
315 int ret;
316
317 if (is_done())
318 return;
319
320 do {
321 ret = __refill_ringbuffer(tip, len);
322 } while (ret == len);
323}
324
325static int read_data(struct thread_information *tip, void *buf, int len)
326{
327 unsigned int start_size, end_size;
328
329 /*
330 * if our ringbuffer is less than 50% full, fill it as much as we can
331 */
332 if (!tip->fd_size || (tip->fd_max_size / tip->fd_size) >= 2)
333 refill_ringbuffer(tip, buf_size);
334
335 if (len > tip->fd_size) {
336 assert(is_done());
337 return -1;
338 }
339
340 /*
341 * see if we wrap the ring
342 */
343 start_size = len;
344 end_size = 0;
345 if (len > (tip->fd_max_size - tip->fd_off)) {
346 start_size = tip->fd_max_size - tip->fd_off;
347 end_size = len - start_size;
348 }
349
350 memcpy(buf, tip->fd_buf + tip->fd_off, start_size);
351 if (end_size)
352 memcpy(buf + start_size, tip->fd_buf, end_size);
353
354 tip->fd_off = (tip->fd_off + len) & (tip->fd_max_size - 1);
355 tip->fd_size -= len;
356 return 0;
357}
358
007c233c 359static int write_data(FILE *file, void *buf, unsigned int buf_len)
8a43bac5 360{
db6fe5bc
JA
361 int ret, bytes_left;
362 char *p = buf;
8a43bac5 363
db6fe5bc
JA
364 bytes_left = buf_len;
365 while (bytes_left > 0) {
007c233c
JA
366 ret = fwrite(p, bytes_left, 1, file);
367 if (ret == 1)
8a43bac5
JA
368 break;
369
db6fe5bc
JA
370 if (ret < 0) {
371 perror("write");
372 return 1;
8a43bac5 373 }
d0ca268b
JA
374 }
375
8a43bac5
JA
376 return 0;
377}
378
e820abd7 379static void *extract_data(struct thread_information *tip, int nb)
8a43bac5
JA
380{
381 unsigned char *buf;
382
383 buf = malloc(nb);
db6fe5bc 384 if (!read_data(tip, buf, nb))
8a43bac5
JA
385 return buf;
386
387 free(buf);
8a43bac5 388 return NULL;
d0ca268b
JA
389}
390
3a9d6c13
JA
391/*
392 * trace may start inside 'bit' or may need to be gotten further on
393 */
394static int get_event_slow(struct thread_information *tip,
395 struct blk_io_trace *bit)
4b5db44a 396{
3a9d6c13
JA
397 const int inc = sizeof(__u32);
398 struct blk_io_trace foo;
fb39f32f 399 unsigned int offset;
3a9d6c13
JA
400 void *p;
401
402 /*
a3e4d330 403 * check if trace is inside
3a9d6c13
JA
404 */
405 offset = 0;
406 p = bit;
407 while (offset < sizeof(*bit)) {
408 p += inc;
409 offset += inc;
410
411 memcpy(&foo, p, inc);
412
413 if (CHECK_MAGIC(&foo))
414 break;
415 }
4b5db44a 416
3a9d6c13
JA
417 /*
418 * part trace found inside, read the rest
419 */
420 if (offset < sizeof(*bit)) {
421 int good_bytes = sizeof(*bit) - offset;
422
423 memmove(bit, p, good_bytes);
424 p = (void *) bit + good_bytes;
425
426 return read_data(tip, p, offset);
427 }
428
429 /*
430 * nothing found, keep looking for start of trace
431 */
4b5db44a
JA
432 do {
433 if (read_data(tip, bit, sizeof(bit->magic)))
434 return -1;
4b5db44a
JA
435 } while (!CHECK_MAGIC(bit));
436
3a9d6c13
JA
437 /*
438 * now get the rest of it
439 */
440 p = &bit->sequence;
a3e4d330 441 if (read_data(tip, p, sizeof(*bit) - inc))
3a9d6c13
JA
442 return -1;
443
444 return 0;
445}
446
447/*
448 * Sometimes relayfs screws us a little, if an event crosses a sub buffer
449 * boundary. So keep looking forward in the trace data until an event
450 * is found
451 */
452static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
453{
454 /*
455 * optimize for the common fast case, a full trace read that
456 * succeeds
457 */
458 if (read_data(tip, bit, sizeof(*bit)))
459 return -1;
460
461 if (CHECK_MAGIC(bit))
4b5db44a
JA
462 return 0;
463
3a9d6c13
JA
464 /*
465 * ok that didn't work, the event may start somewhere inside the
466 * trace itself
467 */
468 return get_event_slow(tip, bit);
4b5db44a
JA
469}
470
d5396421
JA
471static inline void tip_fd_unlock(struct thread_information *tip)
472{
473 if (tip->fd_lock)
474 pthread_mutex_unlock(tip->fd_lock);
475}
476
477static inline void tip_fd_lock(struct thread_information *tip)
478{
479 if (tip->fd_lock)
480 pthread_mutex_lock(tip->fd_lock);
481}
482
3aabcd89 483static void *extract(void *arg)
d0ca268b
JA
484{
485 struct thread_information *tip = arg;
db6fe5bc 486 int pdu_len;
e820abd7 487 char *pdu_data;
d0ca268b
JA
488 struct blk_io_trace t;
489 pid_t pid = getpid();
490 cpu_set_t cpu_mask;
491
492 CPU_ZERO(&cpu_mask);
b9d4294e 493 CPU_SET((tip->cpu), &cpu_mask);
d0ca268b
JA
494
495 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
496 perror("sched_setaffinity");
76718bcd 497 exit_trace(1);
d0ca268b
JA
498 }
499
e7c9f3ff
NS
500 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
501 relay_path, tip->device->buts_name, tip->cpu);
a3e4d330 502 tip->fd = open(tip->fn, O_RDONLY | O_NONBLOCK);
b9d4294e
JA
503 if (tip->fd < 0) {
504 perror(tip->fn);
5c86134e
JA
505 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
506 tip->fn);
76718bcd 507 exit_trace(1);
d0ca268b
JA
508 }
509
a3e4d330
JA
510 /*
511 * start with a ringbuffer that is twice the size of the kernel side
512 */
513 tip->fd_max_size = buf_size * buf_nr * RING_INIT_NR;
514 tip->fd_buf = malloc(tip->fd_max_size);
515 tip->fd_off = 0;
516 tip->fd_size = 0;
517
69e65a9e 518 pdu_data = NULL;
a3e4d330 519 while (1) {
4b5db44a 520 if (get_event(tip, &t))
8a43bac5 521 break;
d0ca268b
JA
522
523 if (verify_trace(&t))
db6fe5bc 524 break;
d0ca268b 525
18ada3d4
JA
526 pdu_len = t.pdu_len;
527
6fe4709e
JA
528 trace_to_be(&t);
529
db6fe5bc 530 if (pdu_len) {
e820abd7 531 pdu_data = extract_data(tip, pdu_len);
db6fe5bc
JA
532 if (!pdu_data)
533 break;
534 }
69e65a9e
JA
535
536 /*
537 * now we have both trace and payload, get a lock on the
538 * output descriptor and send it off
539 */
d5396421
JA
540 tip_fd_lock(tip);
541
007c233c 542 if (write_data(tip->ofile, &t, sizeof(t))) {
d5396421 543 tip_fd_unlock(tip);
db6fe5bc 544 break;
d0ca268b
JA
545 }
546
007c233c 547 if (pdu_data && write_data(tip->ofile, pdu_data, pdu_len)) {
db6fe5bc
JA
548 tip_fd_unlock(tip);
549 break;
550 }
551
552 tip_fd_unlock(tip);
d5396421 553
db6fe5bc 554 if (pdu_data) {
69e65a9e
JA
555 free(pdu_data);
556 pdu_data = NULL;
557 }
87b72777 558
d0ca268b
JA
559 tip->events_processed++;
560 }
561
db6fe5bc 562 exit_trace(1);
d0ca268b
JA
563 return NULL;
564}
565
007c233c
JA
566static void close_thread(struct thread_information *tip)
567{
568 if (tip_closed(tip))
569 return;
570
571 set_tip_closed(tip);
572
573 if (tip->fd != -1)
574 close(tip->fd);
575 if (tip->ofile)
576 fclose(tip->ofile);
577 if (tip->ofile_buffer)
578 free(tip->ofile_buffer);
579
580 tip->fd = -1;
581 tip->ofile = NULL;
582 tip->ofile_buffer = NULL;
583}
584
e7c9f3ff 585static int start_threads(struct device_information *dip)
d0ca268b
JA
586{
587 struct thread_information *tip;
d5396421 588 char op[64];
e7c9f3ff 589 int j, pipeline = output_name && !strcmp(output_name, "-");
007c233c 590 int len, mode;
d0ca268b 591
99c1f5ab 592 for_each_tip(dip, tip, j) {
e7c9f3ff
NS
593 tip->cpu = j;
594 tip->device = dip;
d5396421 595 tip->fd_lock = NULL;
d0ca268b
JA
596 tip->events_processed = 0;
597
e7c9f3ff 598 if (pipeline) {
007c233c 599 tip->ofile = fdopen(STDOUT_FILENO, "w");
d5396421 600 tip->fd_lock = &stdout_mutex;
007c233c
JA
601 mode = _IOLBF;
602 buf_size = 512;
d5396421 603 } else {
d1d7f15f
JA
604 len = 0;
605
606 if (output_dir)
607 len = sprintf(op, "%s/", output_dir);
608
9f6486bd 609 if (output_name) {
d1d7f15f 610 sprintf(op + len, "%s.blktrace.%d", output_name,
9f6486bd
JA
611 tip->cpu);
612 } else {
d1d7f15f 613 sprintf(op + len, "%s.blktrace.%d",
e7c9f3ff 614 dip->buts_name, tip->cpu);
9f6486bd 615 }
007c233c
JA
616 tip->ofile = fopen(op, "w");
617 mode = _IOFBF;
618 buf_size = OFILE_BUF;
d5396421
JA
619 }
620
007c233c 621 if (tip->ofile == NULL) {
d5396421 622 perror(op);
e7c9f3ff 623 return 1;
d5396421
JA
624 }
625
007c233c
JA
626 tip->ofile_buffer = malloc(buf_size);
627 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, buf_size)) {
628 perror("setvbuf");
629 close_thread(tip);
630 return 1;
631 }
632
d0ca268b 633 if (pthread_create(&tip->thread, NULL, extract, tip)) {
e7c9f3ff 634 perror("pthread_create");
007c233c 635 close_thread(tip);
e7c9f3ff 636 return 1;
d0ca268b
JA
637 }
638 }
639
e7c9f3ff 640 return 0;
d0ca268b
JA
641}
642
e7c9f3ff 643static void stop_threads(struct device_information *dip)
3aabcd89 644{
e7c9f3ff
NS
645 struct thread_information *tip;
646 long ret;
007c233c
JA
647 int i;
648
99c1f5ab 649 for_each_tip(dip, tip, i) {
3aabcd89
JA
650 if (pthread_join(tip->thread, (void *) &ret))
651 perror("thread_join");
007c233c 652
72ca8801 653 close_thread(tip);
3aabcd89
JA
654 }
655}
656
e7c9f3ff 657static void stop_all_threads(void)
72ca8801 658{
e7c9f3ff 659 struct device_information *dip;
72ca8801
NS
660 int i;
661
99c1f5ab 662 for_each_dip(dip, i)
e7c9f3ff
NS
663 stop_threads(dip);
664}
665
666static void stop_all_tracing(void)
667{
668 struct device_information *dip;
669 struct thread_information *tip;
670 int i, j;
671
99c1f5ab
JA
672 for_each_dip(dip, i) {
673 for_each_tip(dip, tip, j)
e7c9f3ff 674 close_thread(tip);
007c233c 675
e7c9f3ff
NS
676 stop_trace(dip);
677 }
72ca8801
NS
678}
679
680static void exit_trace(int status)
681{
e7c9f3ff 682 stop_all_tracing();
72ca8801
NS
683 exit(status);
684}
685
e7c9f3ff
NS
686static int resize_devices(char *path)
687{
688 int size = (ndevs + 1) * sizeof(struct device_information);
689
690 device_information = realloc(device_information, size);
691 if (!device_information) {
692 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
693 return 1;
694 }
695 device_information[ndevs].path = path;
696 ndevs++;
697 return 0;
698}
699
700static int open_devices(void)
d0ca268b 701{
e7c9f3ff 702 struct device_information *dip;
d0ca268b 703 int i;
d0ca268b 704
99c1f5ab 705 for_each_dip(dip, i) {
cf9208ea 706 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
707 if (dip->fd < 0) {
708 perror(dip->path);
709 return 1;
710 }
711 }
99c1f5ab 712
e7c9f3ff
NS
713 return 0;
714}
715
716static int start_devices(void)
717{
718 struct device_information *dip;
719 int i, j, size;
720
721 size = ncpus * sizeof(struct thread_information);
722 thread_information = malloc(size * ndevs);
723 if (!thread_information) {
724 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
725 return 1;
726 }
d5396421 727
99c1f5ab 728 for_each_dip(dip, i) {
e7c9f3ff
NS
729 if (start_trace(dip)) {
730 close(dip->fd);
731 fprintf(stderr, "Failed to start trace on %s\n",
732 dip->path);
733 break;
734 }
735 }
99c1f5ab 736
e7c9f3ff 737 if (i != ndevs) {
99c1f5ab 738 __for_each_dip(dip, j, i)
e7c9f3ff 739 stop_trace(dip);
99c1f5ab 740
e7c9f3ff
NS
741 return 1;
742 }
743
99c1f5ab 744 for_each_dip(dip, i) {
e7c9f3ff
NS
745 dip->threads = thread_information + (i * ncpus);
746 if (start_threads(dip)) {
747 fprintf(stderr, "Failed to start worker threads\n");
748 break;
749 }
750 }
99c1f5ab 751
e7c9f3ff 752 if (i != ndevs) {
99c1f5ab 753 __for_each_dip(dip, j, i)
e7c9f3ff 754 stop_threads(dip);
99c1f5ab 755 for_each_dip(dip, i)
e7c9f3ff 756 stop_trace(dip);
99c1f5ab 757
e7c9f3ff 758 return 1;
d0ca268b
JA
759 }
760
e7c9f3ff 761 return 0;
d0ca268b
JA
762}
763
da39451f
TZ
764static int get_dropped_count(const char *buts_name)
765{
766 int fd;
767 char tmp[MAXPATHLEN + 64];
768
769 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
770 relay_path, buts_name);
771
772 fd = open(tmp, O_RDONLY);
773 if (fd < 0) {
774 /*
775 * this may be ok, if the kernel doesn't support dropped counts
776 */
777 if (errno == ENOENT)
778 return 0;
779
780 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
781 return -1;
782 }
783
784 if (read(fd, tmp, sizeof(tmp)) < 0) {
785 perror(tmp);
786 close(fd);
787 return -1;
788 }
789
790 close(fd);
791
792 return atoi(tmp);
793}
794
e7c9f3ff
NS
795static void show_stats(void)
796{
56070ea4 797 int i, j, dropped, total_drops, no_stdout = 0;
e7c9f3ff
NS
798 struct device_information *dip;
799 struct thread_information *tip;
800 unsigned long long events_processed;
428683db 801
e7c9f3ff 802 if (output_name && !strcmp(output_name, "-"))
56070ea4 803 no_stdout = 1;
e7c9f3ff 804
56070ea4 805 total_drops = 0;
99c1f5ab 806 for_each_dip(dip, i) {
56070ea4
JA
807 if (!no_stdout)
808 printf("Device: %s\n", dip->path);
e7c9f3ff 809 events_processed = 0;
99c1f5ab 810 for_each_tip(dip, tip, j) {
56070ea4
JA
811 if (!no_stdout)
812 printf(" CPU%3d: %20ld events\n",
813 tip->cpu, tip->events_processed);
e7c9f3ff
NS
814 events_processed += tip->events_processed;
815 }
da39451f 816 dropped = get_dropped_count(dip->buts_name);
56070ea4
JA
817 total_drops += dropped;
818 if (!no_stdout)
819 printf(" Total: %20lld events (dropped %d)\n",
820 events_processed, dropped);
e7c9f3ff 821 }
56070ea4
JA
822
823 if (total_drops)
824 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 825}
52724a0e
JA
826
827static char usage_str[] = \
828 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
829 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
830 "\t-d Use specified device. May also be given last after options\n" \
831 "\t-r Path to mounted relayfs, defaults to /relay\n" \
832 "\t-o File(s) to send output to\n" \
d1d7f15f 833 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
834 "\t-k Kill a running trace\n" \
835 "\t-w Stop after defined time, in seconds\n" \
836 "\t-a Only trace specified actions. See documentation\n" \
837 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
838 "\t-b Sub buffer size in KiB\n" \
839 "\t-n Number of sub buffers\n" \
52724a0e
JA
840 "\t-v Print program version info\n\n";
841
ee1f4158
NS
842static void show_usage(char *program)
843{
52724a0e 844 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158
NS
845}
846
e820abd7 847static void handle_sigint(__attribute__((__unused__)) int sig)
d0ca268b 848{
d0ca268b 849 done = 1;
8ea62495 850 show_stats();
d0ca268b
JA
851}
852
853int main(int argc, char *argv[])
854{
5270dddd 855 static char default_relay_path[] = "/relay";
e3e74029 856 struct statfs st;
d39c04ca 857 int i, c;
ece238a6 858 int stop_watch = 0;
d39c04ca
AB
859 int act_mask_tmp = 0;
860
861 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
862 switch (c) {
863 case 'a':
864 i = find_mask_map(optarg);
865 if (i < 0) {
ab197ca7 866 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 867 optarg);
7425d456 868 return 1;
d39c04ca
AB
869 }
870 act_mask_tmp |= i;
871 break;
872
873 case 'A':
98f8386b
AB
874 if ((sscanf(optarg, "%x", &i) != 1) ||
875 !valid_act_opt(i)) {
d39c04ca 876 fprintf(stderr,
ab197ca7 877 "Invalid set action mask %s/0x%x\n",
d39c04ca 878 optarg, i);
7425d456 879 return 1;
d39c04ca
AB
880 }
881 act_mask_tmp = i;
882 break;
d0ca268b 883
d39c04ca 884 case 'd':
e7c9f3ff
NS
885 if (resize_devices(optarg) != 0)
886 return 1;
d39c04ca
AB
887 break;
888
5270dddd
JA
889 case 'r':
890 relay_path = optarg;
891 break;
892
d5396421 893 case 'o':
66efebf8 894 output_name = optarg;
d5396421 895 break;
bc39777c
JA
896 case 'k':
897 kill_running_trace = 1;
898 break;
ece238a6
NS
899 case 'w':
900 stop_watch = atoi(optarg);
901 if (stop_watch <= 0) {
902 fprintf(stderr,
903 "Invalid stopwatch value (%d secs)\n",
904 stop_watch);
905 return 1;
906 }
907 break;
57ea8602 908 case 'V':
52724a0e
JA
909 printf("%s version %s\n", argv[0], blktrace_version);
910 return 0;
129aa440
JA
911 case 'b':
912 buf_size = atoi(optarg);
183a0855 913 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440
JA
914 fprintf(stderr,
915 "Invalid buffer size (%d)\n", buf_size);
916 return 1;
917 }
918 buf_size <<= 10;
919 break;
920 case 'n':
921 buf_nr = atoi(optarg);
922 if (buf_nr <= 0) {
923 fprintf(stderr,
924 "Invalid buffer nr (%d)\n", buf_nr);
925 return 1;
926 }
927 break;
d1d7f15f
JA
928 case 'D':
929 output_dir = optarg;
930 break;
d39c04ca 931 default:
ee1f4158 932 show_usage(argv[0]);
7425d456 933 return 1;
d39c04ca
AB
934 }
935 }
936
e7c9f3ff
NS
937 while (optind < argc) {
938 if (resize_devices(argv[optind++]) != 0)
939 return 1;
940 }
ee1f4158 941
e7c9f3ff 942 if (ndevs == 0) {
ee1f4158 943 show_usage(argv[0]);
7425d456 944 return 1;
d39c04ca
AB
945 }
946
5270dddd
JA
947 if (!relay_path)
948 relay_path = default_relay_path;
949
d5396421 950 if (act_mask_tmp != 0)
d39c04ca 951 act_mask = act_mask_tmp;
d0ca268b 952
e3e74029
NS
953 if (statfs(relay_path, &st) < 0) {
954 perror("statfs");
955 fprintf(stderr,"%s does not appear to be a valid path\n",
956 relay_path);
957 return 1;
64acacae 958 } else if (st.f_type != (long) RELAYFS_TYPE) {
e3e74029 959 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
d0ca268b 960 relay_path);
7425d456 961 return 1;
d0ca268b
JA
962 }
963
e7c9f3ff 964 if (open_devices() != 0)
7425d456 965 return 1;
bc39777c
JA
966
967 if (kill_running_trace) {
e7c9f3ff 968 stop_all_traces();
7425d456 969 return 0;
bc39777c
JA
970 }
971
d0ca268b
JA
972 setlocale(LC_NUMERIC, "en_US");
973
e7c9f3ff
NS
974 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
975 if (ncpus < 0) {
976 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 977 return 1;
d0ca268b
JA
978 }
979
e7c9f3ff
NS
980 if (start_devices() != 0)
981 return 1;
982
d0ca268b
JA
983 signal(SIGINT, handle_sigint);
984 signal(SIGHUP, handle_sigint);
985 signal(SIGTERM, handle_sigint);
ece238a6 986 signal(SIGALRM, handle_sigint);
d0ca268b 987
e7c9f3ff 988 atexit(stop_all_tracing);
830fd65c 989
ece238a6
NS
990 if (stop_watch)
991 alarm(stop_watch);
992
d0ca268b
JA
993 while (!is_done())
994 sleep(1);
995
e7c9f3ff
NS
996 stop_all_threads();
997 stop_all_traces();
d0ca268b
JA
998 show_stats();
999
1000 return 0;
1001}
1002