[PATCH] kernel: update kernel patch with the HT sibling time offset fix
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
eb3c8108 32#include <sys/poll.h>
d0ca268b
JA
33#include <stdio.h>
34#include <stdlib.h>
35#include <sched.h>
d39c04ca
AB
36#include <ctype.h>
37#include <getopt.h>
da39451f 38#include <errno.h>
a3e4d330 39#include <assert.h>
d0ca268b
JA
40
41#include "blktrace.h"
42
13d928f0 43static char blktrace_version[] = "0.99";
52724a0e 44
8f551a39
JA
45/*
46 * You may want to increase this even more, if you are logging at a high
47 * rate and see skipped/missed events
48 */
007c233c 49#define BUF_SIZE (512 * 1024)
d0ca268b
JA
50#define BUF_NR (4)
51
007c233c
JA
52#define OFILE_BUF (128 * 1024)
53
e3e74029
NS
54#define RELAYFS_TYPE 0xF0B4A981
55
a3e4d330 56#define RING_INIT_NR (2)
eb3c8108 57#define RING_MAX_NR (16UL)
a3e4d330 58
57ea8602 59#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:"
d5396421 60static struct option l_opts[] = {
5c86134e 61 {
d39c04ca 62 .name = "dev",
428683db 63 .has_arg = required_argument,
d39c04ca
AB
64 .flag = NULL,
65 .val = 'd'
66 },
5c86134e 67 {
d39c04ca 68 .name = "act-mask",
428683db 69 .has_arg = required_argument,
d39c04ca
AB
70 .flag = NULL,
71 .val = 'a'
72 },
5c86134e 73 {
d39c04ca 74 .name = "set-mask",
428683db 75 .has_arg = required_argument,
d39c04ca
AB
76 .flag = NULL,
77 .val = 'A'
78 },
5c86134e 79 {
5270dddd 80 .name = "relay",
428683db 81 .has_arg = required_argument,
5270dddd
JA
82 .flag = NULL,
83 .val = 'r'
84 },
d5396421
JA
85 {
86 .name = "output",
428683db 87 .has_arg = required_argument,
d5396421
JA
88 .flag = NULL,
89 .val = 'o'
90 },
bc39777c
JA
91 {
92 .name = "kill",
428683db 93 .has_arg = no_argument,
bc39777c
JA
94 .flag = NULL,
95 .val = 'k'
96 },
ece238a6
NS
97 {
98 .name = "stopwatch",
428683db 99 .has_arg = required_argument,
ece238a6
NS
100 .flag = NULL,
101 .val = 'w'
102 },
52724a0e
JA
103 {
104 .name = "version",
105 .has_arg = no_argument,
106 .flag = NULL,
57ea8602 107 .val = 'V'
52724a0e 108 },
129aa440 109 {
3f65c585 110 .name = "buffer-size",
129aa440
JA
111 .has_arg = required_argument,
112 .flag = NULL,
113 .val = 'b'
114 },
115 {
3f65c585 116 .name = "num-sub-buffers",
129aa440
JA
117 .has_arg = required_argument,
118 .flag = NULL,
119 .val = 'n'
120 },
d1d7f15f 121 {
3f65c585 122 .name = "output-dir",
d1d7f15f
JA
123 .has_arg = required_argument,
124 .flag = NULL,
125 .val = 'D'
126 },
71ef8b7c
JA
127 {
128 .name = NULL,
129 }
d39c04ca
AB
130};
131
d0ca268b
JA
132struct thread_information {
133 int cpu;
134 pthread_t thread;
b9d4294e
JA
135
136 int fd;
a3e4d330
JA
137 void *fd_buf;
138 unsigned long fd_off;
139 unsigned long fd_size;
140 unsigned long fd_max_size;
b9d4294e
JA
141 char fn[MAXPATHLEN + 64];
142
d5396421 143 pthread_mutex_t *fd_lock;
007c233c
JA
144 FILE *ofile;
145 char *ofile_buffer;
7126171a 146 int ofile_flush;
007c233c 147
d0ca268b 148 unsigned long events_processed;
e7c9f3ff 149 struct device_information *device;
d0ca268b
JA
150};
151
e7c9f3ff
NS
152struct device_information {
153 int fd;
154 char *path;
155 char buts_name[32];
99c1f5ab 156 volatile int trace_started;
eb3c8108 157 unsigned long drop_count;
e7c9f3ff
NS
158 struct thread_information *threads;
159};
d0ca268b 160
e7c9f3ff 161static int ncpus;
d0ca268b 162static struct thread_information *thread_information;
e7c9f3ff
NS
163static int ndevs;
164static struct device_information *device_information;
165
166/* command line option globals */
167static char *relay_path;
d5396421 168static char *output_name;
d1d7f15f 169static char *output_dir;
5c86134e 170static int act_mask = ~0U;
bc39777c 171static int kill_running_trace;
eb3c8108
JA
172static unsigned long buf_size = BUF_SIZE;
173static unsigned long buf_nr = BUF_NR;
d39c04ca 174
e7c9f3ff
NS
175#define is_done() (*(volatile int *)(&done))
176static volatile int done;
177
eb3c8108
JA
178#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
179static volatile int trace_stopped;
180
181#define is_stat_shown() (*(volatile int *)(&stat_shown))
182static volatile int stat_shown;
a3e4d330 183
d5396421
JA
184static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
185
72ca8801
NS
186static void exit_trace(int status);
187
99c1f5ab
JA
188#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
189#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
190
191#define __for_each_dip(__d, __i, __e) \
192 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
193
194#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
195#define for_each_tip(__d, __t, __i) \
196 for (__i = 0, __t = (__d)->threads; __i < ncpus; __i++, __t++)
197
eb3c8108
JA
198static int get_dropped_count(const char *buts_name)
199{
200 int fd;
201 char tmp[MAXPATHLEN + 64];
202
203 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
204 relay_path, buts_name);
205
206 fd = open(tmp, O_RDONLY);
207 if (fd < 0) {
208 /*
209 * this may be ok, if the kernel doesn't support dropped counts
210 */
211 if (errno == ENOENT)
212 return 0;
213
214 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
215 return -1;
216 }
217
218 if (read(fd, tmp, sizeof(tmp)) < 0) {
219 perror(tmp);
220 close(fd);
221 return -1;
222 }
223
224 close(fd);
225
226 return atoi(tmp);
227}
228
e7c9f3ff 229static int start_trace(struct device_information *dip)
d0ca268b
JA
230{
231 struct blk_user_trace_setup buts;
232
1f79c4a0 233 memset(&buts, 0, sizeof(buts));
129aa440
JA
234 buts.buf_size = buf_size;
235 buts.buf_nr = buf_nr;
d39c04ca 236 buts.act_mask = act_mask;
d0ca268b 237
e7c9f3ff 238 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
d0ca268b
JA
239 perror("BLKSTARTTRACE");
240 return 1;
241 }
242
e7c9f3ff 243 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 244 dip_set_tracing(dip, 1);
d0ca268b
JA
245 return 0;
246}
247
e7c9f3ff 248static void stop_trace(struct device_information *dip)
d0ca268b 249{
99c1f5ab
JA
250 if (dip_tracing(dip) || kill_running_trace) {
251 dip_set_tracing(dip, 0);
cf9208ea 252
e7c9f3ff 253 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
707b0914 254 perror("BLKSTOPTRACE");
cf9208ea 255
e7c9f3ff 256 close(dip->fd);
cf9208ea 257 dip->fd = -1;
707b0914 258 }
d0ca268b
JA
259}
260
e7c9f3ff
NS
261static void stop_all_traces(void)
262{
263 struct device_information *dip;
264 int i;
265
eb3c8108
JA
266 for_each_dip(dip, i) {
267 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 268 stop_trace(dip);
eb3c8108 269 }
e7c9f3ff
NS
270}
271
eb3c8108
JA
272static void wait_for_data(struct thread_information *tip)
273{
274 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
275
ae9f71b3 276 poll(&pfd, 1, 10);
eb3c8108
JA
277}
278
279static int __read_data(struct thread_information *tip, void *buf, int len,
280 int block)
d0ca268b 281{
ae9f71b3 282 int ret = 0;
bbabf03a 283
ae9f71b3 284 while (!is_done()) {
bbabf03a 285 ret = read(tip->fd, buf, len);
bbabf03a
JA
286 if (ret > 0)
287 break;
288 else if (!ret) {
289 if (!block)
290 break;
ae9f71b3 291
bbabf03a 292 wait_for_data(tip);
bbabf03a
JA
293 } else {
294 if (errno != EAGAIN) {
a3e4d330
JA
295 perror(tip->fn);
296 fprintf(stderr,"Thread %d failed read of %s\n",
297 tip->cpu, tip->fn);
298 break;
299 }
bbabf03a
JA
300 if (!block) {
301 ret = 0;
302 break;
303 }
304
eb3c8108 305 wait_for_data(tip);
bbabf03a 306 }
8a43bac5
JA
307 }
308
bbabf03a 309 return ret;
8a43bac5
JA
310}
311
eb3c8108
JA
312#define can_grow_ring(tip) ((tip)->fd_max_size < RING_MAX_NR * buf_size * buf_nr)
313
a3e4d330
JA
314static int resize_ringbuffer(struct thread_information *tip)
315{
eb3c8108 316 if (!can_grow_ring(tip))
a3e4d330
JA
317 return 1;
318
319 tip->fd_buf = realloc(tip->fd_buf, 2 * tip->fd_max_size);
eb3c8108
JA
320
321 /*
322 * if the ring currently wraps, copy range over
323 */
324 if (tip->fd_off + tip->fd_size > tip->fd_max_size) {
325 unsigned long wrap_size = tip->fd_size - (tip->fd_max_size - tip->fd_off);
0685312f 326 memmove(tip->fd_buf + tip->fd_max_size, tip->fd_buf, wrap_size);
eb3c8108
JA
327 }
328
a3e4d330
JA
329 tip->fd_max_size <<= 1;
330 return 0;
331}
332
3b8164f9 333static int __refill_ringbuffer(struct thread_information *tip, int len,
eb3c8108 334 int block)
a3e4d330
JA
335{
336 unsigned long off;
337 int ret;
338
a3e4d330
JA
339 off = (tip->fd_size + tip->fd_off) & (tip->fd_max_size - 1);
340 if (off + len > tip->fd_max_size)
341 len = tip->fd_max_size - off;
342
343 assert(len > 0);
344
eb3c8108 345 ret = __read_data(tip, tip->fd_buf + off, len, block);
a3e4d330
JA
346 if (ret < 0)
347 return -1;
348
349 tip->fd_size += ret;
e2369c59 350 return ret;
a3e4d330
JA
351}
352
353/*
354 * keep filling ring until we get a short read
355 */
eb3c8108 356static void refill_ringbuffer(struct thread_information *tip, int block)
a3e4d330 357{
eb3c8108 358 int len = buf_size;
a3e4d330
JA
359 int ret;
360
a3e4d330 361 do {
1c99bc21
JA
362 if (len + tip->fd_size > tip->fd_max_size)
363 resize_ringbuffer(tip);
364
eb3c8108 365 ret = __refill_ringbuffer(tip, len, block);
be4a60c3 366 } while ((ret == len) && !is_done());
a3e4d330
JA
367}
368
3b8164f9
JA
369static int read_data(struct thread_information *tip, void *buf,
370 unsigned int len)
a3e4d330
JA
371{
372 unsigned int start_size, end_size;
373
eb3c8108 374 refill_ringbuffer(tip, len > tip->fd_size);
a3e4d330 375
eb3c8108 376 if (len > tip->fd_size)
a3e4d330 377 return -1;
a3e4d330
JA
378
379 /*
380 * see if we wrap the ring
381 */
382 start_size = len;
383 end_size = 0;
384 if (len > (tip->fd_max_size - tip->fd_off)) {
385 start_size = tip->fd_max_size - tip->fd_off;
386 end_size = len - start_size;
387 }
388
389 memcpy(buf, tip->fd_buf + tip->fd_off, start_size);
390 if (end_size)
391 memcpy(buf + start_size, tip->fd_buf, end_size);
392
393 tip->fd_off = (tip->fd_off + len) & (tip->fd_max_size - 1);
394 tip->fd_size -= len;
395 return 0;
396}
397
7126171a
JA
398static int write_data(struct thread_information *tip,
399 void *buf, unsigned int buf_len)
8a43bac5 400{
7126171a 401 int ret;
8a43bac5 402
7126171a
JA
403 while (1) {
404 ret = fwrite(buf, buf_len, 1, tip->ofile);
007c233c 405 if (ret == 1)
8a43bac5
JA
406 break;
407
db6fe5bc
JA
408 if (ret < 0) {
409 perror("write");
410 return 1;
8a43bac5 411 }
d0ca268b
JA
412 }
413
7126171a
JA
414 if (tip->ofile_flush)
415 fflush(tip->ofile);
416
8a43bac5
JA
417 return 0;
418}
419
e820abd7 420static void *extract_data(struct thread_information *tip, int nb)
8a43bac5
JA
421{
422 unsigned char *buf;
423
424 buf = malloc(nb);
db6fe5bc 425 if (!read_data(tip, buf, nb))
8a43bac5
JA
426 return buf;
427
428 free(buf);
8a43bac5 429 return NULL;
d0ca268b
JA
430}
431
3a9d6c13
JA
432/*
433 * trace may start inside 'bit' or may need to be gotten further on
434 */
435static int get_event_slow(struct thread_information *tip,
436 struct blk_io_trace *bit)
4b5db44a 437{
3a9d6c13
JA
438 const int inc = sizeof(__u32);
439 struct blk_io_trace foo;
fb39f32f 440 unsigned int offset;
3a9d6c13
JA
441 void *p;
442
443 /*
a3e4d330 444 * check if trace is inside
3a9d6c13
JA
445 */
446 offset = 0;
447 p = bit;
448 while (offset < sizeof(*bit)) {
449 p += inc;
450 offset += inc;
451
452 memcpy(&foo, p, inc);
453
454 if (CHECK_MAGIC(&foo))
455 break;
456 }
4b5db44a 457
3a9d6c13
JA
458 /*
459 * part trace found inside, read the rest
460 */
461 if (offset < sizeof(*bit)) {
462 int good_bytes = sizeof(*bit) - offset;
463
464 memmove(bit, p, good_bytes);
465 p = (void *) bit + good_bytes;
466
467 return read_data(tip, p, offset);
468 }
469
470 /*
471 * nothing found, keep looking for start of trace
472 */
4b5db44a
JA
473 do {
474 if (read_data(tip, bit, sizeof(bit->magic)))
475 return -1;
4b5db44a
JA
476 } while (!CHECK_MAGIC(bit));
477
3a9d6c13
JA
478 /*
479 * now get the rest of it
480 */
481 p = &bit->sequence;
a3e4d330 482 if (read_data(tip, p, sizeof(*bit) - inc))
3a9d6c13
JA
483 return -1;
484
485 return 0;
486}
487
488/*
489 * Sometimes relayfs screws us a little, if an event crosses a sub buffer
490 * boundary. So keep looking forward in the trace data until an event
491 * is found
492 */
493static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
494{
495 /*
496 * optimize for the common fast case, a full trace read that
497 * succeeds
498 */
499 if (read_data(tip, bit, sizeof(*bit)))
500 return -1;
501
502 if (CHECK_MAGIC(bit))
4b5db44a
JA
503 return 0;
504
3a9d6c13
JA
505 /*
506 * ok that didn't work, the event may start somewhere inside the
507 * trace itself
508 */
509 return get_event_slow(tip, bit);
4b5db44a
JA
510}
511
d5396421
JA
512static inline void tip_fd_unlock(struct thread_information *tip)
513{
514 if (tip->fd_lock)
515 pthread_mutex_unlock(tip->fd_lock);
516}
517
518static inline void tip_fd_lock(struct thread_information *tip)
519{
520 if (tip->fd_lock)
521 pthread_mutex_lock(tip->fd_lock);
522}
523
91816d54
JA
524static void close_thread(struct thread_information *tip)
525{
91816d54
JA
526 if (tip->fd != -1)
527 close(tip->fd);
528 if (tip->ofile)
529 fclose(tip->ofile);
530 if (tip->ofile_buffer)
531 free(tip->ofile_buffer);
532 if (tip->fd_buf)
533 free(tip->fd_buf);
534
535 tip->fd = -1;
536 tip->ofile = NULL;
537 tip->ofile_buffer = NULL;
538 tip->fd_buf = NULL;
539}
540
3aabcd89 541static void *extract(void *arg)
d0ca268b
JA
542{
543 struct thread_information *tip = arg;
db6fe5bc 544 int pdu_len;
e820abd7 545 char *pdu_data;
d0ca268b
JA
546 struct blk_io_trace t;
547 pid_t pid = getpid();
548 cpu_set_t cpu_mask;
549
550 CPU_ZERO(&cpu_mask);
b9d4294e 551 CPU_SET((tip->cpu), &cpu_mask);
d0ca268b
JA
552
553 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
554 perror("sched_setaffinity");
76718bcd 555 exit_trace(1);
d0ca268b
JA
556 }
557
e7c9f3ff
NS
558 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
559 relay_path, tip->device->buts_name, tip->cpu);
a3e4d330 560 tip->fd = open(tip->fn, O_RDONLY | O_NONBLOCK);
b9d4294e
JA
561 if (tip->fd < 0) {
562 perror(tip->fn);
5c86134e
JA
563 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
564 tip->fn);
76718bcd 565 exit_trace(1);
d0ca268b
JA
566 }
567
a3e4d330
JA
568 /*
569 * start with a ringbuffer that is twice the size of the kernel side
570 */
571 tip->fd_max_size = buf_size * buf_nr * RING_INIT_NR;
572 tip->fd_buf = malloc(tip->fd_max_size);
573 tip->fd_off = 0;
574 tip->fd_size = 0;
575
69e65a9e 576 pdu_data = NULL;
a3e4d330 577 while (1) {
4b5db44a 578 if (get_event(tip, &t))
8a43bac5 579 break;
d0ca268b
JA
580
581 if (verify_trace(&t))
db6fe5bc 582 break;
d0ca268b 583
18ada3d4
JA
584 pdu_len = t.pdu_len;
585
6fe4709e
JA
586 trace_to_be(&t);
587
db6fe5bc 588 if (pdu_len) {
e820abd7 589 pdu_data = extract_data(tip, pdu_len);
db6fe5bc
JA
590 if (!pdu_data)
591 break;
592 }
69e65a9e
JA
593
594 /*
595 * now we have both trace and payload, get a lock on the
596 * output descriptor and send it off
597 */
d5396421
JA
598 tip_fd_lock(tip);
599
7126171a 600 if (write_data(tip, &t, sizeof(t))) {
d5396421 601 tip_fd_unlock(tip);
db6fe5bc 602 break;
d0ca268b
JA
603 }
604
7126171a 605 if (pdu_data && write_data(tip, pdu_data, pdu_len)) {
db6fe5bc
JA
606 tip_fd_unlock(tip);
607 break;
608 }
609
610 tip_fd_unlock(tip);
d5396421 611
db6fe5bc 612 if (pdu_data) {
69e65a9e
JA
613 free(pdu_data);
614 pdu_data = NULL;
615 }
87b72777 616
d0ca268b
JA
617 tip->events_processed++;
618 }
619
91816d54 620 close_thread(tip);
d0ca268b
JA
621 return NULL;
622}
623
e7c9f3ff 624static int start_threads(struct device_information *dip)
d0ca268b
JA
625{
626 struct thread_information *tip;
d5396421 627 char op[64];
e7c9f3ff 628 int j, pipeline = output_name && !strcmp(output_name, "-");
57e8a2ad 629 int len, mode, vbuf_size;
d0ca268b 630
99c1f5ab 631 for_each_tip(dip, tip, j) {
e7c9f3ff
NS
632 tip->cpu = j;
633 tip->device = dip;
d5396421 634 tip->fd_lock = NULL;
d0ca268b
JA
635 tip->events_processed = 0;
636
e7c9f3ff 637 if (pipeline) {
007c233c 638 tip->ofile = fdopen(STDOUT_FILENO, "w");
d5396421 639 tip->fd_lock = &stdout_mutex;
7126171a 640 tip->ofile_flush = 1;
007c233c 641 mode = _IOLBF;
57e8a2ad 642 vbuf_size = 512;
d5396421 643 } else {
d1d7f15f
JA
644 len = 0;
645
646 if (output_dir)
647 len = sprintf(op, "%s/", output_dir);
648
9f6486bd 649 if (output_name) {
d1d7f15f 650 sprintf(op + len, "%s.blktrace.%d", output_name,
9f6486bd
JA
651 tip->cpu);
652 } else {
d1d7f15f 653 sprintf(op + len, "%s.blktrace.%d",
e7c9f3ff 654 dip->buts_name, tip->cpu);
9f6486bd 655 }
007c233c 656 tip->ofile = fopen(op, "w");
7126171a 657 tip->ofile_flush = 0;
007c233c 658 mode = _IOFBF;
57e8a2ad 659 vbuf_size = OFILE_BUF;
d5396421
JA
660 }
661
007c233c 662 if (tip->ofile == NULL) {
d5396421 663 perror(op);
e7c9f3ff 664 return 1;
d5396421
JA
665 }
666
57e8a2ad
TZ
667 tip->ofile_buffer = malloc(vbuf_size);
668 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
007c233c
JA
669 perror("setvbuf");
670 close_thread(tip);
671 return 1;
672 }
673
d0ca268b 674 if (pthread_create(&tip->thread, NULL, extract, tip)) {
e7c9f3ff 675 perror("pthread_create");
007c233c 676 close_thread(tip);
e7c9f3ff 677 return 1;
d0ca268b
JA
678 }
679 }
680
e7c9f3ff 681 return 0;
d0ca268b
JA
682}
683
e7c9f3ff 684static void stop_threads(struct device_information *dip)
3aabcd89 685{
e7c9f3ff 686 struct thread_information *tip;
91816d54 687 unsigned long ret;
007c233c
JA
688 int i;
689
91816d54
JA
690 for_each_tip(dip, tip, i)
691 (void) pthread_join(tip->thread, (void *) &ret);
3aabcd89
JA
692}
693
e7c9f3ff 694static void stop_all_threads(void)
72ca8801 695{
e7c9f3ff 696 struct device_information *dip;
72ca8801
NS
697 int i;
698
99c1f5ab 699 for_each_dip(dip, i)
e7c9f3ff
NS
700 stop_threads(dip);
701}
702
703static void stop_all_tracing(void)
704{
705 struct device_information *dip;
91816d54 706 int i;
007c233c 707
91816d54 708 for_each_dip(dip, i)
e7c9f3ff 709 stop_trace(dip);
72ca8801
NS
710}
711
712static void exit_trace(int status)
713{
eb3c8108
JA
714 if (!is_trace_stopped()) {
715 trace_stopped = 1;
716 stop_all_threads();
717 stop_all_tracing();
718 }
719
72ca8801
NS
720 exit(status);
721}
722
e7c9f3ff
NS
723static int resize_devices(char *path)
724{
725 int size = (ndevs + 1) * sizeof(struct device_information);
726
727 device_information = realloc(device_information, size);
728 if (!device_information) {
729 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
730 return 1;
731 }
732 device_information[ndevs].path = path;
733 ndevs++;
734 return 0;
735}
736
737static int open_devices(void)
d0ca268b 738{
e7c9f3ff 739 struct device_information *dip;
d0ca268b 740 int i;
d0ca268b 741
99c1f5ab 742 for_each_dip(dip, i) {
cf9208ea 743 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
744 if (dip->fd < 0) {
745 perror(dip->path);
746 return 1;
747 }
748 }
99c1f5ab 749
e7c9f3ff
NS
750 return 0;
751}
752
753static int start_devices(void)
754{
755 struct device_information *dip;
756 int i, j, size;
757
758 size = ncpus * sizeof(struct thread_information);
759 thread_information = malloc(size * ndevs);
760 if (!thread_information) {
761 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
762 return 1;
763 }
d5396421 764
99c1f5ab 765 for_each_dip(dip, i) {
e7c9f3ff
NS
766 if (start_trace(dip)) {
767 close(dip->fd);
768 fprintf(stderr, "Failed to start trace on %s\n",
769 dip->path);
770 break;
771 }
772 }
99c1f5ab 773
e7c9f3ff 774 if (i != ndevs) {
99c1f5ab 775 __for_each_dip(dip, j, i)
e7c9f3ff 776 stop_trace(dip);
99c1f5ab 777
e7c9f3ff
NS
778 return 1;
779 }
780
99c1f5ab 781 for_each_dip(dip, i) {
e7c9f3ff
NS
782 dip->threads = thread_information + (i * ncpus);
783 if (start_threads(dip)) {
784 fprintf(stderr, "Failed to start worker threads\n");
785 break;
786 }
787 }
99c1f5ab 788
e7c9f3ff 789 if (i != ndevs) {
99c1f5ab 790 __for_each_dip(dip, j, i)
e7c9f3ff 791 stop_threads(dip);
99c1f5ab 792 for_each_dip(dip, i)
e7c9f3ff 793 stop_trace(dip);
99c1f5ab 794
e7c9f3ff 795 return 1;
d0ca268b
JA
796 }
797
e7c9f3ff 798 return 0;
d0ca268b
JA
799}
800
e7c9f3ff
NS
801static void show_stats(void)
802{
eb3c8108 803 int i, j, no_stdout = 0;
e7c9f3ff
NS
804 struct device_information *dip;
805 struct thread_information *tip;
806 unsigned long long events_processed;
eb3c8108
JA
807 unsigned long total_drops;
808
809 if (is_stat_shown())
810 return;
811
812 stat_shown = 1;
428683db 813
e7c9f3ff 814 if (output_name && !strcmp(output_name, "-"))
56070ea4 815 no_stdout = 1;
e7c9f3ff 816
56070ea4 817 total_drops = 0;
99c1f5ab 818 for_each_dip(dip, i) {
56070ea4
JA
819 if (!no_stdout)
820 printf("Device: %s\n", dip->path);
e7c9f3ff 821 events_processed = 0;
99c1f5ab 822 for_each_tip(dip, tip, j) {
56070ea4
JA
823 if (!no_stdout)
824 printf(" CPU%3d: %20ld events\n",
825 tip->cpu, tip->events_processed);
e7c9f3ff
NS
826 events_processed += tip->events_processed;
827 }
eb3c8108 828 total_drops += dip->drop_count;
56070ea4 829 if (!no_stdout)
eb3c8108
JA
830 printf(" Total: %20lld events (dropped %lu)\n",
831 events_processed, dip->drop_count);
e7c9f3ff 832 }
56070ea4
JA
833
834 if (total_drops)
835 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 836}
52724a0e
JA
837
838static char usage_str[] = \
839 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
840 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
841 "\t-d Use specified device. May also be given last after options\n" \
842 "\t-r Path to mounted relayfs, defaults to /relay\n" \
843 "\t-o File(s) to send output to\n" \
d1d7f15f 844 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
845 "\t-k Kill a running trace\n" \
846 "\t-w Stop after defined time, in seconds\n" \
847 "\t-a Only trace specified actions. See documentation\n" \
848 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
849 "\t-b Sub buffer size in KiB\n" \
850 "\t-n Number of sub buffers\n" \
52724a0e
JA
851 "\t-v Print program version info\n\n";
852
ee1f4158
NS
853static void show_usage(char *program)
854{
52724a0e 855 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 856}
e820abd7 857static void handle_sigint(__attribute__((__unused__)) int sig)
d0ca268b 858{
d0ca268b 859 done = 1;
eb3c8108
JA
860 if (!is_trace_stopped()) {
861 trace_stopped = 1;
862 stop_all_threads();
863 stop_all_traces();
864 }
865
8ea62495 866 show_stats();
d0ca268b
JA
867}
868
869int main(int argc, char *argv[])
870{
5270dddd 871 static char default_relay_path[] = "/relay";
e3e74029 872 struct statfs st;
d39c04ca 873 int i, c;
ece238a6 874 int stop_watch = 0;
d39c04ca
AB
875 int act_mask_tmp = 0;
876
877 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
878 switch (c) {
879 case 'a':
880 i = find_mask_map(optarg);
881 if (i < 0) {
ab197ca7 882 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 883 optarg);
7425d456 884 return 1;
d39c04ca
AB
885 }
886 act_mask_tmp |= i;
887 break;
888
889 case 'A':
98f8386b
AB
890 if ((sscanf(optarg, "%x", &i) != 1) ||
891 !valid_act_opt(i)) {
d39c04ca 892 fprintf(stderr,
ab197ca7 893 "Invalid set action mask %s/0x%x\n",
d39c04ca 894 optarg, i);
7425d456 895 return 1;
d39c04ca
AB
896 }
897 act_mask_tmp = i;
898 break;
d0ca268b 899
d39c04ca 900 case 'd':
e7c9f3ff
NS
901 if (resize_devices(optarg) != 0)
902 return 1;
d39c04ca
AB
903 break;
904
5270dddd
JA
905 case 'r':
906 relay_path = optarg;
907 break;
908
d5396421 909 case 'o':
66efebf8 910 output_name = optarg;
d5396421 911 break;
bc39777c
JA
912 case 'k':
913 kill_running_trace = 1;
914 break;
ece238a6
NS
915 case 'w':
916 stop_watch = atoi(optarg);
917 if (stop_watch <= 0) {
918 fprintf(stderr,
919 "Invalid stopwatch value (%d secs)\n",
920 stop_watch);
921 return 1;
922 }
923 break;
57ea8602 924 case 'V':
52724a0e
JA
925 printf("%s version %s\n", argv[0], blktrace_version);
926 return 0;
129aa440 927 case 'b':
eb3c8108 928 buf_size = strtoul(optarg, NULL, 10);
183a0855 929 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 930 fprintf(stderr,
eb3c8108 931 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
932 return 1;
933 }
934 buf_size <<= 10;
935 break;
936 case 'n':
eb3c8108 937 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
938 if (buf_nr <= 0) {
939 fprintf(stderr,
eb3c8108 940 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
941 return 1;
942 }
943 break;
d1d7f15f
JA
944 case 'D':
945 output_dir = optarg;
946 break;
d39c04ca 947 default:
ee1f4158 948 show_usage(argv[0]);
7425d456 949 return 1;
d39c04ca
AB
950 }
951 }
952
e7c9f3ff
NS
953 while (optind < argc) {
954 if (resize_devices(argv[optind++]) != 0)
955 return 1;
956 }
ee1f4158 957
e7c9f3ff 958 if (ndevs == 0) {
ee1f4158 959 show_usage(argv[0]);
7425d456 960 return 1;
d39c04ca
AB
961 }
962
5270dddd
JA
963 if (!relay_path)
964 relay_path = default_relay_path;
965
d5396421 966 if (act_mask_tmp != 0)
d39c04ca 967 act_mask = act_mask_tmp;
d0ca268b 968
e3e74029
NS
969 if (statfs(relay_path, &st) < 0) {
970 perror("statfs");
971 fprintf(stderr,"%s does not appear to be a valid path\n",
972 relay_path);
973 return 1;
64acacae 974 } else if (st.f_type != (long) RELAYFS_TYPE) {
e3e74029 975 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
d0ca268b 976 relay_path);
7425d456 977 return 1;
d0ca268b
JA
978 }
979
e7c9f3ff 980 if (open_devices() != 0)
7425d456 981 return 1;
bc39777c
JA
982
983 if (kill_running_trace) {
e7c9f3ff 984 stop_all_traces();
7425d456 985 return 0;
bc39777c
JA
986 }
987
d0ca268b
JA
988 setlocale(LC_NUMERIC, "en_US");
989
e7c9f3ff
NS
990 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
991 if (ncpus < 0) {
992 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 993 return 1;
d0ca268b
JA
994 }
995
e7c9f3ff
NS
996 if (start_devices() != 0)
997 return 1;
998
d0ca268b
JA
999 signal(SIGINT, handle_sigint);
1000 signal(SIGHUP, handle_sigint);
1001 signal(SIGTERM, handle_sigint);
ece238a6 1002 signal(SIGALRM, handle_sigint);
d0ca268b 1003
e7c9f3ff 1004 atexit(stop_all_tracing);
830fd65c 1005
ece238a6
NS
1006 if (stop_watch)
1007 alarm(stop_watch);
1008
d0ca268b
JA
1009 while (!is_done())
1010 sleep(1);
1011
eb3c8108
JA
1012 if (!is_trace_stopped()) {
1013 trace_stopped = 1;
91816d54
JA
1014 stop_all_threads();
1015 stop_all_traces();
91816d54 1016 }
d0ca268b 1017
eb3c8108
JA
1018 show_stats();
1019
d0ca268b
JA
1020 return 0;
1021}
1022