[PATCH] kernel: update relayfs to set POLLMSG on subbuffer switch
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
eb3c8108 32#include <sys/poll.h>
b7106311 33#include <sys/mman.h>
8e86c98a 34#include <sys/socket.h>
d0ca268b
JA
35#include <stdio.h>
36#include <stdlib.h>
37#include <sched.h>
d39c04ca
AB
38#include <ctype.h>
39#include <getopt.h>
da39451f 40#include <errno.h>
8e86c98a
JA
41#include <netinet/in.h>
42#include <arpa/inet.h>
43#include <netdb.h>
32f18c48 44#include <sys/sendfile.h>
d0ca268b
JA
45
46#include "blktrace.h"
21f55651 47#include "barrier.h"
d0ca268b 48
13d928f0 49static char blktrace_version[] = "0.99";
52724a0e 50
8f551a39
JA
51/*
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
54 */
007c233c 55#define BUF_SIZE (512 * 1024)
d0ca268b
JA
56#define BUF_NR (4)
57
007c233c
JA
58#define OFILE_BUF (128 * 1024)
59
e3e74029
NS
60#define RELAYFS_TYPE 0xF0B4A981
61
32f18c48 62#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
d5396421 63static struct option l_opts[] = {
5c86134e 64 {
d39c04ca 65 .name = "dev",
428683db 66 .has_arg = required_argument,
d39c04ca
AB
67 .flag = NULL,
68 .val = 'd'
69 },
5c86134e 70 {
d39c04ca 71 .name = "act-mask",
428683db 72 .has_arg = required_argument,
d39c04ca
AB
73 .flag = NULL,
74 .val = 'a'
75 },
5c86134e 76 {
d39c04ca 77 .name = "set-mask",
428683db 78 .has_arg = required_argument,
d39c04ca
AB
79 .flag = NULL,
80 .val = 'A'
81 },
5c86134e 82 {
5270dddd 83 .name = "relay",
428683db 84 .has_arg = required_argument,
5270dddd
JA
85 .flag = NULL,
86 .val = 'r'
87 },
d5396421
JA
88 {
89 .name = "output",
428683db 90 .has_arg = required_argument,
d5396421
JA
91 .flag = NULL,
92 .val = 'o'
93 },
bc39777c
JA
94 {
95 .name = "kill",
428683db 96 .has_arg = no_argument,
bc39777c
JA
97 .flag = NULL,
98 .val = 'k'
99 },
ece238a6
NS
100 {
101 .name = "stopwatch",
428683db 102 .has_arg = required_argument,
ece238a6
NS
103 .flag = NULL,
104 .val = 'w'
105 },
52724a0e
JA
106 {
107 .name = "version",
108 .has_arg = no_argument,
109 .flag = NULL,
57ea8602 110 .val = 'V'
52724a0e 111 },
129aa440 112 {
3f65c585 113 .name = "buffer-size",
129aa440
JA
114 .has_arg = required_argument,
115 .flag = NULL,
116 .val = 'b'
117 },
118 {
3f65c585 119 .name = "num-sub-buffers",
129aa440
JA
120 .has_arg = required_argument,
121 .flag = NULL,
122 .val = 'n'
123 },
d1d7f15f 124 {
3f65c585 125 .name = "output-dir",
d1d7f15f
JA
126 .has_arg = required_argument,
127 .flag = NULL,
128 .val = 'D'
129 },
8e86c98a
JA
130 {
131 .name = "listen",
132 .has_arg = no_argument,
133 .flag = NULL,
134 .val = 'l'
135 },
136 {
137 .name = "host",
138 .has_arg = required_argument,
139 .flag = NULL,
140 .val = 'h'
141 },
142 {
143 .name = "port",
144 .has_arg = required_argument,
145 .flag = NULL,
146 .val = 'p'
147 },
32f18c48
JA
148 {
149 .name = "sendfile",
150 .has_arg = no_argument,
151 .flag = NULL,
152 .val = 's'
153 },
71ef8b7c
JA
154 {
155 .name = NULL,
156 }
d39c04ca
AB
157};
158
9db17354 159struct tip_subbuf {
9db17354 160 void *buf;
5be4bdaf
JA
161 unsigned int len;
162 unsigned int max_len;
9db17354
JA
163};
164
21f55651
JA
165#define FIFO_SIZE (1024) /* should be plenty big! */
166#define CL_SIZE (128) /* cache line, any bigger? */
167
168struct tip_subbuf_fifo {
169 int tail __attribute__((aligned(CL_SIZE)));
170 int head __attribute__((aligned(CL_SIZE)));
171 struct tip_subbuf *q[FIFO_SIZE];
172};
173
d0ca268b
JA
174struct thread_information {
175 int cpu;
176 pthread_t thread;
b9d4294e
JA
177
178 int fd;
a3e4d330 179 void *fd_buf;
b9d4294e
JA
180 char fn[MAXPATHLEN + 64];
181
e3bf54d8
JA
182 struct in_addr cl_in_addr;
183
007c233c
JA
184 FILE *ofile;
185 char *ofile_buffer;
32f18c48 186 off_t ofile_offset;
9db17354 187 int ofile_stdout;
8e86c98a 188 int ofile_mmap;
007c233c 189
0cc7d25e
JA
190 int (*get_subbuf)(struct thread_information *, unsigned int);
191 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
192 int (*read_data)(struct thread_information *, void *, unsigned int);
193
d0ca268b 194 unsigned long events_processed;
b7106311 195 unsigned long long data_read;
bcbeb60f 196 unsigned long long data_queued;
e7c9f3ff 197 struct device_information *device;
9db17354
JA
198
199 int exited;
200
b7106311
JA
201 /*
202 * piped fifo buffers
203 */
21f55651 204 struct tip_subbuf_fifo fifo;
7de86b12 205 struct tip_subbuf *leftover_ts;
b7106311
JA
206
207 /*
208 * mmap controlled output files
209 */
210 unsigned long long fs_size;
211 unsigned long long fs_max_size;
212 unsigned long fs_off;
213 void *fs_buf;
214 unsigned long fs_buf_len;
d0ca268b
JA
215};
216
e7c9f3ff
NS
217struct device_information {
218 int fd;
219 char *path;
220 char buts_name[32];
99c1f5ab 221 volatile int trace_started;
eb3c8108 222 unsigned long drop_count;
e7c9f3ff
NS
223 struct thread_information *threads;
224};
d0ca268b 225
e7c9f3ff 226static int ncpus;
d0ca268b 227static struct thread_information *thread_information;
e7c9f3ff
NS
228static int ndevs;
229static struct device_information *device_information;
230
231/* command line option globals */
232static char *relay_path;
d5396421 233static char *output_name;
d1d7f15f 234static char *output_dir;
5c86134e 235static int act_mask = ~0U;
bc39777c 236static int kill_running_trace;
eb3c8108
JA
237static unsigned long buf_size = BUF_SIZE;
238static unsigned long buf_nr = BUF_NR;
b7106311 239static unsigned int page_size;
d39c04ca 240
e7c9f3ff
NS
241#define is_done() (*(volatile int *)(&done))
242static volatile int done;
243
eb3c8108
JA
244#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
245static volatile int trace_stopped;
246
247#define is_stat_shown() (*(volatile int *)(&stat_shown))
248static volatile int stat_shown;
a3e4d330 249
8e86c98a
JA
250int data_is_native = -1;
251
72ca8801
NS
252static void exit_trace(int status);
253
99c1f5ab
JA
254#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
255#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
256
257#define __for_each_dip(__d, __i, __e) \
258 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
259
260#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
9db17354
JA
261#define for_each_tip(__d, __t, __j) \
262 for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
99c1f5ab 263
8e86c98a
JA
264/*
265 * networking stuff follows. we include a magic number so we know whether
266 * to endianness convert or not
267 */
268struct blktrace_net_hdr {
269 u32 magic; /* same as trace magic */
22cd0c02 270 char buts_name[32]; /* trace name */
8e86c98a 271 u32 cpu; /* for which cpu */
22cd0c02 272 u32 max_cpus;
8e86c98a
JA
273 u32 len; /* length of following trace data */
274};
275
276#define TRACE_NET_PORT (8462)
277
278enum {
279 Net_none = 0,
280 Net_server,
281 Net_client,
282};
283
284/*
285 * network cmd line params
286 */
287static char hostname[MAXHOSTNAMELEN];
288static int net_port = TRACE_NET_PORT;
289static int net_mode = 0;
f6fead25 290static int net_use_sendfile;
8e86c98a
JA
291
292static int net_in_fd = -1;
293static int net_out_fd = -1;
294
295static void handle_sigint(__attribute__((__unused__)) int sig)
296{
7035d92d
JA
297 struct device_information *dip;
298 int i;
299
300 /*
301 * stop trace so we can reap currently produced data
302 */
303 for_each_dip(dip, i) {
921b05fe
JA
304 if (dip->fd == -1)
305 continue;
7035d92d
JA
306 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
307 perror("BLKTRACESTOP");
308 }
309
8e86c98a
JA
310 done = 1;
311}
312
eb3c8108
JA
313static int get_dropped_count(const char *buts_name)
314{
315 int fd;
316 char tmp[MAXPATHLEN + 64];
317
318 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
319 relay_path, buts_name);
320
321 fd = open(tmp, O_RDONLY);
322 if (fd < 0) {
323 /*
324 * this may be ok, if the kernel doesn't support dropped counts
325 */
326 if (errno == ENOENT)
327 return 0;
328
329 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
330 return -1;
331 }
332
333 if (read(fd, tmp, sizeof(tmp)) < 0) {
334 perror(tmp);
335 close(fd);
336 return -1;
337 }
338
339 close(fd);
340
341 return atoi(tmp);
342}
343
e7c9f3ff 344static int start_trace(struct device_information *dip)
d0ca268b
JA
345{
346 struct blk_user_trace_setup buts;
347
1f79c4a0 348 memset(&buts, 0, sizeof(buts));
129aa440
JA
349 buts.buf_size = buf_size;
350 buts.buf_nr = buf_nr;
d39c04ca 351 buts.act_mask = act_mask;
d0ca268b 352
ed71a31e
JA
353 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
354 perror("BLKTRACESETUP");
355 return 1;
356 }
357
358 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
359 perror("BLKTRACESTART");
d0ca268b
JA
360 return 1;
361 }
362
e7c9f3ff 363 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 364 dip_set_tracing(dip, 1);
d0ca268b
JA
365 return 0;
366}
367
e7c9f3ff 368static void stop_trace(struct device_information *dip)
d0ca268b 369{
99c1f5ab
JA
370 if (dip_tracing(dip) || kill_running_trace) {
371 dip_set_tracing(dip, 0);
cf9208ea 372
7035d92d
JA
373 /*
374 * should be stopped, just don't complain if it isn't
375 */
376 ioctl(dip->fd, BLKTRACESTOP);
377
ed71a31e
JA
378 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
379 perror("BLKTRACETEARDOWN");
cf9208ea 380
e7c9f3ff 381 close(dip->fd);
cf9208ea 382 dip->fd = -1;
707b0914 383 }
d0ca268b
JA
384}
385
e7c9f3ff
NS
386static void stop_all_traces(void)
387{
388 struct device_information *dip;
389 int i;
390
eb3c8108
JA
391 for_each_dip(dip, i) {
392 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 393 stop_trace(dip);
eb3c8108 394 }
e7c9f3ff
NS
395}
396
7934e668 397static void wait_for_data(struct thread_information *tip, int events)
eb3c8108 398{
7934e668 399 struct pollfd pfd = { .fd = tip->fd, .events = events };
eb3c8108 400
9db17354 401 do {
7934e668
JA
402 if (poll(&pfd, 1, 100) < 0) {
403 perror("poll");
404 break;
405 }
406 if (pfd.revents & events)
9db17354
JA
407 break;
408 if (tip->ofile_stdout)
409 break;
410 } while (!is_done());
eb3c8108
JA
411}
412
0cc7d25e
JA
413static int read_data_file(struct thread_information *tip, void *buf,
414 unsigned int len)
d0ca268b 415{
ae9f71b3 416 int ret = 0;
bbabf03a 417
9db17354 418 do {
7934e668 419 wait_for_data(tip, POLLIN);
ae9f71b3 420
9db17354
JA
421 ret = read(tip->fd, buf, len);
422 if (!ret)
423 continue;
424 else if (ret > 0)
425 return ret;
426 else {
bbabf03a 427 if (errno != EAGAIN) {
a3e4d330
JA
428 perror(tip->fn);
429 fprintf(stderr,"Thread %d failed read of %s\n",
430 tip->cpu, tip->fn);
431 break;
432 }
9db17354 433 continue;
bbabf03a 434 }
9db17354 435 } while (!is_done());
8a43bac5 436
bbabf03a 437 return ret;
8e86c98a 438
8a43bac5
JA
439}
440
0cc7d25e
JA
441static int read_data_net(struct thread_information *tip, void *buf,
442 unsigned int len)
8e86c98a
JA
443{
444 unsigned int bytes_left = len;
445 int ret = 0;
446
447 do {
448 ret = recv(net_in_fd, buf, bytes_left, MSG_WAITALL);
449
450 if (!ret)
451 continue;
452 else if (ret < 0) {
453 if (errno != EAGAIN) {
454 perror(tip->fn);
455 fprintf(stderr, "server: failed read\n");
456 return 0;
457 }
458 continue;
459 } else {
460 buf += ret;
461 bytes_left -= ret;
462 }
463 } while (!is_done() && bytes_left);
464
410d7c62 465 return len - bytes_left;
8e86c98a
JA
466}
467
8e86c98a
JA
468static inline struct tip_subbuf *
469subbuf_fifo_dequeue(struct thread_information *tip)
a3e4d330 470{
21f55651
JA
471 const int head = tip->fifo.head;
472 const int next = (head + 1) & (FIFO_SIZE - 1);
473
474 if (head != tip->fifo.tail) {
475 struct tip_subbuf *ts = tip->fifo.q[head];
476
477 store_barrier();
478 tip->fifo.head = next;
479 return ts;
480 }
481
482 return NULL;
9db17354 483}
eb3c8108 484
21f55651
JA
485static inline int subbuf_fifo_queue(struct thread_information *tip,
486 struct tip_subbuf *ts)
9db17354 487{
21f55651
JA
488 const int tail = tip->fifo.tail;
489 const int next = (tail + 1) & (FIFO_SIZE - 1);
490
491 if (next != tip->fifo.head) {
492 tip->fifo.q[tail] = ts;
493 store_barrier();
494 tip->fifo.tail = next;
495 return 0;
496 }
497
498 fprintf(stderr, "fifo too small!\n");
499 return 1;
a3e4d330
JA
500}
501
b7106311
JA
502/*
503 * For file output, truncate and mmap the file appropriately
504 */
8e86c98a 505static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
b7106311
JA
506{
507 int ofd = fileno(tip->ofile);
508 int ret;
509
510 /*
511 * extend file, if we have to. use chunks of 16 subbuffers.
512 */
513 if (tip->fs_off + buf_size > tip->fs_buf_len) {
514 if (tip->fs_buf) {
5975d309 515 munlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
516 munmap(tip->fs_buf, tip->fs_buf_len);
517 tip->fs_buf = NULL;
518 }
519
520 tip->fs_off = tip->fs_size & (page_size - 1);
521 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
522 tip->fs_max_size += tip->fs_buf_len;
523
524 if (ftruncate(ofd, tip->fs_max_size) < 0) {
525 perror("ftruncate");
526 return -1;
527 }
528
529 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
530 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
531 if (tip->fs_buf == MAP_FAILED) {
532 perror("mmap");
533 return -1;
534 }
5975d309 535 mlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
536 }
537
7934e668 538 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
b7106311 539 if (ret >= 0) {
dbfbd6db 540 tip->data_read += ret;
b7106311
JA
541 tip->fs_size += ret;
542 tip->fs_off += ret;
543 return 0;
544 }
545
546 return -1;
547}
548
18eed2a7
JA
549/*
550 * Use the copy approach for pipes and network
551 */
552static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
553{
554 struct tip_subbuf *ts = malloc(sizeof(*ts));
555 int ret;
556
557 ts->buf = malloc(buf_size);
558 ts->max_len = maxlen;
559
7934e668 560 ret = tip->read_data(tip, ts->buf, ts->max_len);
18eed2a7
JA
561 if (ret > 0) {
562 ts->len = ret;
dbfbd6db 563 tip->data_read += ret;
7035d92d
JA
564 if (subbuf_fifo_queue(tip, ts))
565 return -1;
18eed2a7
JA
566 }
567
568 return ret;
569}
570
32f18c48
JA
571static int get_subbuf_sendfile(struct thread_information *tip,
572 unsigned int maxlen)
573{
11eedd9b 574 struct tip_subbuf *ts;
bcbeb60f
TZ
575 struct stat sb;
576 unsigned int ready;
32f18c48 577
7934e668 578 wait_for_data(tip, POLLMSG);
18eed2a7
JA
579
580 /*
581 * hack to get last data out, we can't use sendfile for that
582 */
583 if (is_done())
584 return get_subbuf(tip, maxlen);
585
bcbeb60f
TZ
586 if (fstat(tip->fd, &sb) < 0) {
587 perror("trace stat");
588 return -1;
589 }
590 ready = sb.st_size - tip->data_queued;
7934e668
JA
591 if (!ready) {
592 usleep(1000);
bcbeb60f 593 return 0;
7934e668 594 }
bcbeb60f 595
11629347
JA
596 ts = malloc(sizeof(*ts));
597 ts->buf = NULL;
598 ts->max_len = 0;
bcbeb60f
TZ
599 ts->len = ready;
600 tip->data_queued += ready;
1be42f3d 601
11629347
JA
602 if (subbuf_fifo_queue(tip, ts))
603 return -1;
11eedd9b 604
bcbeb60f 605 return ready;
32f18c48
JA
606}
607
9db17354 608static void close_thread(struct thread_information *tip)
a3e4d330 609{
9db17354
JA
610 if (tip->fd != -1)
611 close(tip->fd);
612 if (tip->ofile)
613 fclose(tip->ofile);
614 if (tip->ofile_buffer)
615 free(tip->ofile_buffer);
616 if (tip->fd_buf)
617 free(tip->fd_buf);
1c99bc21 618
9db17354
JA
619 tip->fd = -1;
620 tip->ofile = NULL;
621 tip->ofile_buffer = NULL;
622 tip->fd_buf = NULL;
a3e4d330
JA
623}
624
8e86c98a
JA
625static void tip_ftrunc_final(struct thread_information *tip)
626{
627 /*
628 * truncate to right size and cleanup mmap
629 */
630 if (tip->ofile_mmap) {
631 int ofd = fileno(tip->ofile);
632
633 if (tip->fs_buf)
634 munmap(tip->fs_buf, tip->fs_buf_len);
635
636 ftruncate(ofd, tip->fs_size);
637 }
638}
639
9db17354 640static void *thread_main(void *arg)
a3e4d330 641{
9db17354
JA
642 struct thread_information *tip = arg;
643 pid_t pid = getpid();
644 cpu_set_t cpu_mask;
a3e4d330 645
9db17354
JA
646 CPU_ZERO(&cpu_mask);
647 CPU_SET((tip->cpu), &cpu_mask);
a3e4d330 648
9db17354
JA
649 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
650 perror("sched_setaffinity");
651 exit_trace(1);
652 }
a3e4d330 653
9db17354
JA
654 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
655 relay_path, tip->device->buts_name, tip->cpu);
656 tip->fd = open(tip->fn, O_RDONLY);
657 if (tip->fd < 0) {
658 perror(tip->fn);
659 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
660 tip->fn);
661 exit_trace(1);
a3e4d330
JA
662 }
663
b7106311 664 while (!is_done()) {
7035d92d 665 if (tip->get_subbuf(tip, buf_size) < 0)
0cc7d25e 666 break;
b7106311
JA
667 }
668
7035d92d
JA
669 /*
670 * trace is stopped, pull data until we get a short read
671 */
672 while (tip->get_subbuf(tip, buf_size) > 0)
673 ;
674
8e86c98a
JA
675 tip_ftrunc_final(tip);
676 tip->exited = 1;
677 return NULL;
678}
b7106311 679
8e86c98a
JA
680static int write_data_net(int fd, void *buf, unsigned int buf_len)
681{
682 unsigned int bytes_left = buf_len;
683 int ret;
b7106311 684
8e86c98a
JA
685 while (bytes_left) {
686 ret = send(fd, buf, bytes_left, 0);
687 if (ret < 0) {
688 perror("send");
689 return 1;
690 }
691
692 buf += ret;
693 bytes_left -= ret;
9db17354 694 }
a3e4d330 695
8e86c98a 696 return 0;
a3e4d330
JA
697}
698
32f18c48 699static int net_send_header(struct thread_information *tip, unsigned int len)
8e86c98a
JA
700{
701 struct blktrace_net_hdr hdr;
8e86c98a
JA
702
703 hdr.magic = BLK_IO_TRACE_MAGIC;
22cd0c02 704 strcpy(hdr.buts_name, tip->device->buts_name);
8e86c98a 705 hdr.cpu = tip->cpu;
22cd0c02 706 hdr.max_cpus = ncpus;
32f18c48 707 hdr.len = len;
8e86c98a 708
32f18c48
JA
709 return write_data_net(net_out_fd, &hdr, sizeof(hdr));
710}
8e86c98a 711
6a752c90
JA
712/*
713 * send header with 0 length to signal end-of-run
714 */
715static void net_client_send_close(void)
716{
7934e668 717 struct device_information *dip;
6a752c90 718 struct blktrace_net_hdr hdr;
7934e668 719 int i;
6a752c90
JA
720
721 hdr.magic = BLK_IO_TRACE_MAGIC;
722 hdr.cpu = 0;
723 hdr.max_cpus = ncpus;
724 hdr.len = 0;
725
7934e668
JA
726 for_each_dip(dip, i) {
727 strcpy(hdr.buts_name, dip->buts_name);
728 hdr.cpu += get_dropped_count(dip->buts_name);
729 }
730
6a752c90
JA
731 write_data_net(net_out_fd, &hdr, sizeof(hdr));
732}
733
32f18c48
JA
734static int flush_subbuf_net(struct thread_information *tip,
735 struct tip_subbuf *ts)
736{
737 if (net_send_header(tip, ts->len))
7934e668 738 return -1;
22cd0c02 739 if (write_data_net(net_out_fd, ts->buf, ts->len))
7934e668 740 return -1;
8e86c98a 741
f0597a7e 742 free(ts->buf);
8e86c98a 743 free(ts);
7934e668 744 return 1;
8e86c98a
JA
745}
746
f6fead25
JA
747static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
748{
11629347
JA
749 int ret = sendfile(net_out_fd, tip->fd, NULL, ts->len);
750
751 if (ret < 0) {
752 perror("sendfile");
753 return 1;
754 } else if (ret < (int) ts->len) {
755 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
756 return 1;
757 }
758
759 return 0;
760}
761
32f18c48
JA
762static int flush_subbuf_sendfile(struct thread_information *tip,
763 struct tip_subbuf *ts)
764{
7934e668 765 int ret = -1;
18eed2a7
JA
766
767 /*
768 * currently we cannot use sendfile() on the last bytes read, as they
769 * may not be a full subbuffer. get_subbuf_sendfile() falls back to
770 * the read approach for those, so use send() to ship them out
771 */
772 if (ts->buf)
773 return flush_subbuf_net(tip, ts);
11eedd9b 774
f6fead25 775 if (net_send_header(tip, ts->len))
11629347 776 goto err;
f6fead25 777 if (net_sendfile(tip, ts))
11629347 778 goto err;
32f18c48 779
f6fead25 780 tip->data_read += ts->len;
e076d33b 781 tip->ofile_offset += buf_size;
7934e668 782 ret = 1;
11629347 783err:
32f18c48 784 free(ts);
11629347 785 return ret;
32f18c48
JA
786}
787
8e86c98a
JA
788static int write_data(struct thread_information *tip, void *buf,
789 unsigned int buf_len)
8a43bac5 790{
7126171a 791 int ret;
8a43bac5 792
6480258a
JA
793 if (!buf_len)
794 return 0;
795
7126171a
JA
796 while (1) {
797 ret = fwrite(buf, buf_len, 1, tip->ofile);
007c233c 798 if (ret == 1)
8a43bac5
JA
799 break;
800
db6fe5bc
JA
801 if (ret < 0) {
802 perror("write");
803 return 1;
8a43bac5 804 }
d0ca268b
JA
805 }
806
9db17354 807 if (tip->ofile_stdout)
7126171a
JA
808 fflush(tip->ofile);
809
8a43bac5
JA
810 return 0;
811}
812
8e86c98a
JA
813static int flush_subbuf_file(struct thread_information *tip,
814 struct tip_subbuf *ts)
8a43bac5 815{
9db17354
JA
816 unsigned int offset = 0;
817 struct blk_io_trace *t;
818 int pdu_len, events = 0;
8a43bac5 819
9db17354 820 /*
7de86b12 821 * surplus from last run
9db17354 822 */
7de86b12
AB
823 if (tip->leftover_ts) {
824 struct tip_subbuf *prev_ts = tip->leftover_ts;
825
9e8b753c 826 if (prev_ts->len + ts->len > prev_ts->max_len) {
7de86b12
AB
827 prev_ts->max_len += ts->len;
828 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
829 }
830
9e8b753c 831 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
7de86b12
AB
832 prev_ts->len += ts->len;
833
834 free(ts->buf);
835 free(ts);
836
837 ts = prev_ts;
838 tip->leftover_ts = NULL;
9db17354 839 }
d0ca268b 840
9db17354
JA
841 while (offset + sizeof(*t) <= ts->len) {
842 t = ts->buf + offset;
3a9d6c13 843
9cfa6c2b
AB
844 if (verify_trace(t)) {
845 write_data(tip, ts->buf, offset);
9db17354 846 return -1;
9cfa6c2b 847 }
3a9d6c13 848
9db17354 849 pdu_len = t->pdu_len;
3a9d6c13 850
9db17354 851 if (offset + sizeof(*t) + pdu_len > ts->len)
3a9d6c13 852 break;
4b5db44a 853
9db17354
JA
854 offset += sizeof(*t) + pdu_len;
855 tip->events_processed++;
b7106311 856 tip->data_read += sizeof(*t) + pdu_len;
9db17354 857 events++;
3a9d6c13
JA
858 }
859
9cfa6c2b
AB
860 if (write_data(tip, ts->buf, offset))
861 return -1;
862
3a9d6c13 863 /*
9db17354 864 * leftover bytes, save them for next time
3a9d6c13 865 */
9db17354 866 if (offset != ts->len) {
7de86b12 867 tip->leftover_ts = ts;
9e8b753c
JA
868 ts->len -= offset;
869 memmove(ts->buf, ts->buf + offset, ts->len);
7de86b12
AB
870 } else {
871 free(ts->buf);
872 free(ts);
9db17354 873 }
4b5db44a 874
9db17354 875 return events;
4b5db44a
JA
876}
877
9db17354 878static int write_tip_events(struct thread_information *tip)
d5396421 879{
21f55651 880 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
d5396421 881
0cc7d25e
JA
882 if (ts)
883 return tip->flush_subbuf(tip, ts);
91816d54 884
9db17354 885 return 0;
91816d54
JA
886}
887
9db17354
JA
888/*
889 * scans the tips we know and writes out the subbuffers we accumulate
890 */
891static void get_and_write_events(void)
d0ca268b 892{
9db17354
JA
893 struct device_information *dip;
894 struct thread_information *tip;
27223f19 895 int i, j, events, ret, tips_running;
d0ca268b 896
9db17354
JA
897 while (!is_done()) {
898 events = 0;
d0ca268b 899
9db17354
JA
900 for_each_dip(dip, i) {
901 for_each_tip(dip, tip, j) {
902 ret = write_tip_events(tip);
903 if (ret > 0)
904 events += ret;
905 }
906 }
d0ca268b 907
9db17354 908 if (!events)
7934e668 909 usleep(100000);
d0ca268b
JA
910 }
911
a3e4d330 912 /*
9db17354 913 * reap stored events
a3e4d330 914 */
9db17354
JA
915 do {
916 events = 0;
27223f19 917 tips_running = 0;
9db17354
JA
918 for_each_dip(dip, i) {
919 for_each_tip(dip, tip, j) {
920 ret = write_tip_events(tip);
921 if (ret > 0)
922 events += ret;
27223f19 923 tips_running += !tip->exited;
9db17354 924 }
69e65a9e 925 }
9db17354 926 usleep(10);
27223f19 927 } while (events || tips_running);
d0ca268b
JA
928}
929
b7106311
JA
930static void wait_for_threads(void)
931{
932 /*
8e86c98a
JA
933 * for piped or network output, poll and fetch data for writeout.
934 * for files, we just wait around for trace threads to exit
b7106311 935 */
8e86c98a
JA
936 if ((output_name && !strcmp(output_name, "-")) ||
937 net_mode == Net_client)
b7106311
JA
938 get_and_write_events();
939 else {
940 struct device_information *dip;
941 struct thread_information *tip;
942 int i, j, tips_running;
943
944 do {
945 tips_running = 0;
7934e668 946 usleep(100000);
b7106311
JA
947
948 for_each_dip(dip, i)
949 for_each_tip(dip, tip, j)
950 tips_running += !tip->exited;
951 } while (tips_running);
952 }
6a752c90
JA
953
954 if (net_mode == Net_client)
955 net_client_send_close();
b7106311
JA
956}
957
e3bf54d8
JA
958static int fill_ofname(struct thread_information *tip, char *dst,
959 char *buts_name)
8e86c98a 960{
e3bf54d8 961 struct stat sb;
8e86c98a 962 int len = 0;
e3bf54d8 963 time_t t;
8e86c98a
JA
964
965 if (output_dir)
966 len = sprintf(dst, "%s/", output_dir);
967
e3bf54d8
JA
968 if (net_mode == Net_server) {
969 len += sprintf(dst + len, "%s-", inet_ntoa(tip->cl_in_addr));
970 time(&t);
971 len += strftime(dst + len, 64, "%F-%T/", gmtime(&t));
972 }
973
974 if (stat(dst, &sb) < 0) {
975 if (errno != ENOENT) {
976 perror("stat");
977 return 1;
978 }
979 if (mkdir(dst, 0755) < 0) {
980 perror(dst);
981 fprintf(stderr, "Can't make output dir\n");
982 return 1;
983 }
984 }
985
8e86c98a 986 if (output_name)
e3bf54d8 987 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
8e86c98a 988 else
e3bf54d8
JA
989 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
990
991 return 0;
8e86c98a
JA
992}
993
0cc7d25e
JA
994static void fill_ops(struct thread_information *tip)
995{
996 /*
997 * setup ops
998 */
32f18c48 999 if (net_mode == Net_client) {
36808255 1000 if (net_use_sendfile) {
32f18c48
JA
1001 tip->get_subbuf = get_subbuf_sendfile;
1002 tip->flush_subbuf = flush_subbuf_sendfile;
1003 } else {
1004 tip->get_subbuf = get_subbuf;
1005 tip->flush_subbuf = flush_subbuf_net;
1006 }
1007 } else {
1008 if (tip->ofile_mmap)
1009 tip->get_subbuf = mmap_subbuf;
1010 else
1011 tip->get_subbuf = get_subbuf;
0cc7d25e 1012
0cc7d25e 1013 tip->flush_subbuf = flush_subbuf_file;
32f18c48
JA
1014 }
1015
0cc7d25e
JA
1016 if (net_mode == Net_server)
1017 tip->read_data = read_data_net;
1018 else
1019 tip->read_data = read_data_file;
1020}
1021
ddf22842
JA
1022static int tip_open_output(struct device_information *dip,
1023 struct thread_information *tip)
d0ca268b 1024{
ddf22842 1025 int pipeline = output_name && !strcmp(output_name, "-");
8e86c98a 1026 int mode, vbuf_size;
e3bf54d8 1027 char op[128];
d0ca268b 1028
ddf22842
JA
1029 if (net_mode == Net_client) {
1030 tip->ofile = NULL;
1031 tip->ofile_stdout = 0;
1032 tip->ofile_mmap = 0;
0c0b75b4 1033 goto done;
ddf22842
JA
1034 } else if (pipeline) {
1035 tip->ofile = fdopen(STDOUT_FILENO, "w");
1036 tip->ofile_stdout = 1;
1037 tip->ofile_mmap = 0;
1038 mode = _IOLBF;
1039 vbuf_size = 512;
1040 } else {
e3bf54d8
JA
1041 if (fill_ofname(tip, op, dip->buts_name))
1042 return 1;
ddf22842
JA
1043 tip->ofile = fopen(op, "w+");
1044 tip->ofile_stdout = 0;
1045 tip->ofile_mmap = 1;
1046 mode = _IOFBF;
1047 vbuf_size = OFILE_BUF;
1048 }
d5396421 1049
0c0b75b4 1050 if (tip->ofile == NULL) {
ddf22842
JA
1051 perror(op);
1052 return 1;
1053 }
d5396421 1054
0c0b75b4
JA
1055 tip->ofile_buffer = malloc(vbuf_size);
1056 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1057 perror("setvbuf");
1058 close_thread(tip);
1059 return 1;
ddf22842
JA
1060 }
1061
0c0b75b4 1062done:
ddf22842
JA
1063 fill_ops(tip);
1064 return 0;
1065}
007c233c 1066
ddf22842
JA
1067static int start_threads(struct device_information *dip)
1068{
1069 struct thread_information *tip;
1070 int j;
1071
1072 for_each_tip(dip, tip, j) {
1073 tip->cpu = j;
1074 tip->device = dip;
1075 tip->events_processed = 0;
11eedd9b 1076 tip->fd = -1;
ddf22842
JA
1077 memset(&tip->fifo, 0, sizeof(tip->fifo));
1078 tip->leftover_ts = NULL;
1079
1080 if (tip_open_output(dip, tip))
1081 return 1;
0cc7d25e 1082
9db17354 1083 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
e7c9f3ff 1084 perror("pthread_create");
007c233c 1085 close_thread(tip);
e7c9f3ff 1086 return 1;
d0ca268b
JA
1087 }
1088 }
1089
e7c9f3ff 1090 return 0;
d0ca268b
JA
1091}
1092
e7c9f3ff 1093static void stop_threads(struct device_information *dip)
3aabcd89 1094{
e7c9f3ff 1095 struct thread_information *tip;
91816d54 1096 unsigned long ret;
007c233c
JA
1097 int i;
1098
9db17354 1099 for_each_tip(dip, tip, i) {
91816d54 1100 (void) pthread_join(tip->thread, (void *) &ret);
9db17354
JA
1101 close_thread(tip);
1102 }
3aabcd89
JA
1103}
1104
e7c9f3ff 1105static void stop_all_threads(void)
72ca8801 1106{
e7c9f3ff 1107 struct device_information *dip;
72ca8801
NS
1108 int i;
1109
99c1f5ab 1110 for_each_dip(dip, i)
e7c9f3ff
NS
1111 stop_threads(dip);
1112}
1113
1114static void stop_all_tracing(void)
1115{
1116 struct device_information *dip;
91816d54 1117 int i;
007c233c 1118
91816d54 1119 for_each_dip(dip, i)
e7c9f3ff 1120 stop_trace(dip);
72ca8801
NS
1121}
1122
1123static void exit_trace(int status)
1124{
eb3c8108
JA
1125 if (!is_trace_stopped()) {
1126 trace_stopped = 1;
1127 stop_all_threads();
1128 stop_all_tracing();
1129 }
1130
72ca8801
NS
1131 exit(status);
1132}
1133
e7c9f3ff
NS
1134static int resize_devices(char *path)
1135{
1136 int size = (ndevs + 1) * sizeof(struct device_information);
1137
1138 device_information = realloc(device_information, size);
1139 if (!device_information) {
1140 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1141 return 1;
1142 }
1143 device_information[ndevs].path = path;
1144 ndevs++;
1145 return 0;
1146}
1147
1148static int open_devices(void)
d0ca268b 1149{
e7c9f3ff 1150 struct device_information *dip;
d0ca268b 1151 int i;
d0ca268b 1152
99c1f5ab 1153 for_each_dip(dip, i) {
cf9208ea 1154 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
1155 if (dip->fd < 0) {
1156 perror(dip->path);
1157 return 1;
1158 }
1159 }
99c1f5ab 1160
e7c9f3ff
NS
1161 return 0;
1162}
1163
1164static int start_devices(void)
1165{
1166 struct device_information *dip;
1167 int i, j, size;
1168
1169 size = ncpus * sizeof(struct thread_information);
1170 thread_information = malloc(size * ndevs);
1171 if (!thread_information) {
1172 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1173 return 1;
1174 }
d5396421 1175
99c1f5ab 1176 for_each_dip(dip, i) {
e7c9f3ff
NS
1177 if (start_trace(dip)) {
1178 close(dip->fd);
1179 fprintf(stderr, "Failed to start trace on %s\n",
1180 dip->path);
1181 break;
1182 }
1183 }
99c1f5ab 1184
e7c9f3ff 1185 if (i != ndevs) {
99c1f5ab 1186 __for_each_dip(dip, j, i)
e7c9f3ff 1187 stop_trace(dip);
99c1f5ab 1188
e7c9f3ff
NS
1189 return 1;
1190 }
1191
99c1f5ab 1192 for_each_dip(dip, i) {
e7c9f3ff
NS
1193 dip->threads = thread_information + (i * ncpus);
1194 if (start_threads(dip)) {
1195 fprintf(stderr, "Failed to start worker threads\n");
1196 break;
1197 }
1198 }
99c1f5ab 1199
e7c9f3ff 1200 if (i != ndevs) {
99c1f5ab 1201 __for_each_dip(dip, j, i)
e7c9f3ff 1202 stop_threads(dip);
99c1f5ab 1203 for_each_dip(dip, i)
e7c9f3ff 1204 stop_trace(dip);
99c1f5ab 1205
e7c9f3ff 1206 return 1;
d0ca268b
JA
1207 }
1208
e7c9f3ff 1209 return 0;
d0ca268b
JA
1210}
1211
e7c9f3ff
NS
1212static void show_stats(void)
1213{
e7c9f3ff
NS
1214 struct device_information *dip;
1215 struct thread_information *tip;
b7106311 1216 unsigned long long events_processed, data_read;
eb3c8108 1217 unsigned long total_drops;
2f903295 1218 int i, j, no_stdout = 0;
eb3c8108
JA
1219
1220 if (is_stat_shown())
1221 return;
1222
2f903295
JA
1223 if (output_name && !strcmp(output_name, "-"))
1224 no_stdout = 1;
1225
eb3c8108 1226 stat_shown = 1;
428683db 1227
56070ea4 1228 total_drops = 0;
99c1f5ab 1229 for_each_dip(dip, i) {
2f903295 1230 if (!no_stdout)
56070ea4 1231 printf("Device: %s\n", dip->path);
e7c9f3ff 1232 events_processed = 0;
b7106311 1233 data_read = 0;
99c1f5ab 1234 for_each_tip(dip, tip, j) {
2f903295 1235 if (!no_stdout)
b7106311
JA
1236 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1237 tip->cpu, tip->events_processed,
54824c20 1238 (tip->data_read + 1023) >> 10);
e7c9f3ff 1239 events_processed += tip->events_processed;
b7106311 1240 data_read += tip->data_read;
e7c9f3ff 1241 }
eb3c8108 1242 total_drops += dip->drop_count;
2f903295 1243 if (!no_stdout)
b7106311
JA
1244 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1245 events_processed, dip->drop_count,
18d8437d 1246 (data_read + 1023) >> 10);
e7c9f3ff 1247 }
56070ea4
JA
1248
1249 if (total_drops)
1250 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 1251}
52724a0e 1252
e3bf54d8
JA
1253static struct device_information *net_get_dip(char *buts_name,
1254 struct in_addr *cl_in_addr)
8e86c98a 1255{
22cd0c02 1256 struct device_information *dip;
8e86c98a
JA
1257 int i;
1258
22cd0c02
JA
1259 for (i = 0; i < ndevs; i++) {
1260 dip = &device_information[i];
8e86c98a 1261
22cd0c02
JA
1262 if (!strcmp(dip->buts_name, buts_name))
1263 return dip;
8e86c98a
JA
1264 }
1265
22cd0c02
JA
1266 device_information = realloc(device_information, (ndevs + 1) * sizeof(*dip));
1267 dip = &device_information[ndevs];
921b05fe
JA
1268 memset(dip, 0, sizeof(*dip));
1269 dip->fd = -1;
22cd0c02 1270 strcpy(dip->buts_name, buts_name);
921b05fe 1271 dip->path = strdup(buts_name);
22cd0c02
JA
1272 ndevs++;
1273 dip->threads = malloc(ncpus * sizeof(struct thread_information));
1274 memset(dip->threads, 0, ncpus * sizeof(struct thread_information));
1275
1276 /*
1277 * open all files
1278 */
1279 for (i = 0; i < ncpus; i++) {
1280 struct thread_information *tip = &dip->threads[i];
8e86c98a 1281
22cd0c02 1282 tip->cpu = i;
22cd0c02 1283 tip->device = dip;
1366e53a 1284 tip->fd = -1;
e3bf54d8 1285 tip->cl_in_addr = *cl_in_addr;
8e86c98a 1286
ddf22842 1287 if (tip_open_output(dip, tip))
22cd0c02 1288 return NULL;
8e86c98a
JA
1289 }
1290
22cd0c02
JA
1291 return dip;
1292}
1293
e3bf54d8
JA
1294static struct thread_information *net_get_tip(struct blktrace_net_hdr *bnh,
1295 struct in_addr *cl_in_addr)
22cd0c02
JA
1296{
1297 struct device_information *dip;
1298
1299 ncpus = bnh->max_cpus;
e3bf54d8 1300 dip = net_get_dip(bnh->buts_name, cl_in_addr);
22cd0c02 1301 return &dip->threads[bnh->cpu];
8e86c98a
JA
1302}
1303
1304static int net_get_header(struct blktrace_net_hdr *bnh)
1305{
1306 int fl = fcntl(net_in_fd, F_GETFL);
1307 int bytes_left, ret;
1308 void *p = bnh;
1309
1310 fcntl(net_in_fd, F_SETFL, fl | O_NONBLOCK);
1311 bytes_left = sizeof(*bnh);
1312 while (bytes_left && !is_done()) {
1313 ret = recv(net_in_fd, p, bytes_left, MSG_WAITALL);
1314 if (ret < 0) {
1315 if (errno != EAGAIN) {
1316 perror("recv header");
1317 return 1;
1318 }
7934e668 1319 usleep(1000);
8e86c98a
JA
1320 continue;
1321 } else if (!ret) {
7934e668 1322 usleep(1000);
8e86c98a
JA
1323 continue;
1324 } else {
1325 p += ret;
1326 bytes_left -= ret;
1327 }
1328 }
1329 fcntl(net_in_fd, F_SETFL, fl & ~O_NONBLOCK);
227f89ff 1330 return bytes_left;
8e86c98a
JA
1331}
1332
e3bf54d8 1333static int net_server_loop(struct in_addr *cl_in_addr)
8e86c98a
JA
1334{
1335 struct thread_information *tip;
1336 struct blktrace_net_hdr bnh;
1337
1338 if (net_get_header(&bnh))
1339 return 1;
1340
1341 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1342 fprintf(stderr, "server: received data is bad\n");
1343 return 1;
1344 }
1345
1346 if (!data_is_native) {
227f89ff 1347 bnh.magic = be32_to_cpu(bnh.magic);
8e86c98a
JA
1348 bnh.cpu = be32_to_cpu(bnh.cpu);
1349 bnh.len = be32_to_cpu(bnh.len);
1350 }
1351
227f89ff
JA
1352 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1353 fprintf(stderr, "server: bad data magic\n");
1354 return 1;
1355 }
1356
6a752c90
JA
1357 /*
1358 * len == 0 means that the other end signalled end-of-run
1359 */
1360 if (!bnh.len) {
7934e668
JA
1361 /*
1362 * overload cpu count with dropped events
1363 */
1364 struct device_information *dip;
1365
1366 dip = net_get_dip(bnh.buts_name, cl_in_addr);
1367 dip->drop_count = bnh.cpu;
1368
6a752c90
JA
1369 fprintf(stderr, "server: end of run\n");
1370 return 1;
1371 }
1372
e3bf54d8 1373 tip = net_get_tip(&bnh, cl_in_addr);
8e86c98a
JA
1374 if (!tip)
1375 return 1;
1376
1377 if (mmap_subbuf(tip, bnh.len))
1378 return 1;
1379
1380 return 0;
1381}
1382
659bcc3f
JA
1383static int get_connection(int fd, struct sockaddr_in *addr)
1384{
1385 struct pollfd pfd = { .fd = fd, .events = POLLIN };
1386 socklen_t socklen;
1387
1388 printf("blktrace: waiting for incoming connection...\n");
1389
1390 if (poll(&pfd, 1, -1) < 0) {
1391 perror("poll for connection");
1392 return 1;
1393 }
1394 if ((pfd.revents & POLLIN) == 0)
1395 return 1;
1396
1397 socklen = sizeof(*addr);
1398 net_in_fd = accept(fd, (struct sockaddr *) addr, &socklen);
1399 if (net_in_fd < 0) {
1400 perror("accept");
1401 return 1;
1402 }
1403
1404 printf("blktrace: connection from %s\n", inet_ntoa(addr->sin_addr));
1405 return 0;
1406}
1407
8e86c98a
JA
1408/*
1409 * Start here when we are in server mode - just fetch data from the network
1410 * and dump to files
1411 */
1412static int net_server(void)
1413{
898bbd3b
JA
1414 struct device_information *dip;
1415 struct thread_information *tip;
8e86c98a 1416 struct sockaddr_in addr;
22cd0c02 1417 int fd, opt, i, j;
8e86c98a
JA
1418
1419 fd = socket(AF_INET, SOCK_STREAM, 0);
1420 if (fd < 0) {
1421 perror("server: socket");
1422 return 1;
1423 }
1424
1425 opt = 1;
1426 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1427 perror("setsockopt");
1428 return 1;
1429 }
1430
1431 memset(&addr, 0, sizeof(addr));
1432 addr.sin_family = AF_INET;
1433 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1434 addr.sin_port = htons(net_port);
1435
1436 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1437 perror("bind");
1438 return 1;
1439 }
1440
1441 if (listen(fd, 1) < 0) {
1442 perror("listen");
1443 return 1;
1444 }
1445
6a752c90 1446repeat:
659bcc3f
JA
1447 if (get_connection(fd, &addr))
1448 return 0;
8e86c98a
JA
1449
1450 while (!is_done()) {
e3bf54d8 1451 if (net_server_loop(&addr.sin_addr))
8e86c98a
JA
1452 break;
1453 }
1454
898bbd3b
JA
1455 for_each_dip(dip, i)
1456 for_each_tip(dip, tip, j)
1457 tip_ftrunc_final(tip);
8e86c98a 1458
410d7c62 1459 show_stats();
6a752c90
JA
1460
1461 if (is_done())
1462 return 0;
1463
898bbd3b
JA
1464 /*
1465 * cleanup for next run
1466 */
1467 for_each_dip(dip, i) {
1468 for_each_tip(dip, tip, j)
1469 fclose(tip->ofile);
1470
1471 free(dip->threads);
921b05fe 1472 free(dip->path);
898bbd3b
JA
1473 }
1474
1475 free(device_information);
1476 device_information = NULL;
1477 ncpus = ndevs = 0;
b46a0342
JA
1478
1479 close(net_in_fd);
1480 net_in_fd = -1;
dbfbd6db 1481 stat_shown = 0;
6a752c90 1482 goto repeat;
8e86c98a
JA
1483}
1484
1485/*
1486 * Setup outgoing network connection where we will transmit data
1487 */
1488static int net_setup_client(void)
1489{
1490 struct sockaddr_in addr;
1491 int fd;
1492
1493 fd = socket(AF_INET, SOCK_STREAM, 0);
1494 if (fd < 0) {
1495 perror("client: socket");
1496 return 1;
1497 }
1498
1499 memset(&addr, 0, sizeof(addr));
1500 addr.sin_family = AF_INET;
1501 addr.sin_port = htons(net_port);
1502
1503 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1504 struct hostent *hent = gethostbyname(hostname);
1505 if (!hent) {
1506 perror("gethostbyname");
1507 return 1;
1508 }
1509
1510 memcpy(&addr.sin_addr, hent->h_addr, 4);
1511 strcpy(hostname, hent->h_name);
1512 }
1513
1514 printf("blktrace: connecting to %s\n", hostname);
1515
1516 if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1517 perror("client: connect");
1518 return 1;
1519 }
1520
1521 printf("blktrace: connected!\n");
1522 net_out_fd = fd;
1523 return 0;
1524}
1525
52724a0e
JA
1526static char usage_str[] = \
1527 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1528 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1529 "\t-d Use specified device. May also be given last after options\n" \
1530 "\t-r Path to mounted relayfs, defaults to /relay\n" \
1531 "\t-o File(s) to send output to\n" \
d1d7f15f 1532 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
1533 "\t-k Kill a running trace\n" \
1534 "\t-w Stop after defined time, in seconds\n" \
1535 "\t-a Only trace specified actions. See documentation\n" \
1536 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
1537 "\t-b Sub buffer size in KiB\n" \
1538 "\t-n Number of sub buffers\n" \
f531b94d
JA
1539 "\t-l Run in network listen mode (blktrace server)\n" \
1540 "\t-h Run in network client mode, connecting to the given host\n" \
1541 "\t-p Network port to use (default 8462)\n" \
1542 "\t-s Make the network client use sendfile() to transfer data\n" \
1543 "\t-V Print program version info\n\n";
52724a0e 1544
ee1f4158
NS
1545static void show_usage(char *program)
1546{
52724a0e 1547 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 1548}
d0ca268b
JA
1549
1550int main(int argc, char *argv[])
1551{
5270dddd 1552 static char default_relay_path[] = "/relay";
e3e74029 1553 struct statfs st;
d39c04ca 1554 int i, c;
ece238a6 1555 int stop_watch = 0;
d39c04ca
AB
1556 int act_mask_tmp = 0;
1557
1558 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1559 switch (c) {
1560 case 'a':
1561 i = find_mask_map(optarg);
1562 if (i < 0) {
ab197ca7 1563 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 1564 optarg);
7425d456 1565 return 1;
d39c04ca
AB
1566 }
1567 act_mask_tmp |= i;
1568 break;
1569
1570 case 'A':
98f8386b
AB
1571 if ((sscanf(optarg, "%x", &i) != 1) ||
1572 !valid_act_opt(i)) {
d39c04ca 1573 fprintf(stderr,
ab197ca7 1574 "Invalid set action mask %s/0x%x\n",
d39c04ca 1575 optarg, i);
7425d456 1576 return 1;
d39c04ca
AB
1577 }
1578 act_mask_tmp = i;
1579 break;
d0ca268b 1580
d39c04ca 1581 case 'd':
e7c9f3ff
NS
1582 if (resize_devices(optarg) != 0)
1583 return 1;
d39c04ca
AB
1584 break;
1585
5270dddd
JA
1586 case 'r':
1587 relay_path = optarg;
1588 break;
1589
d5396421 1590 case 'o':
66efebf8 1591 output_name = optarg;
d5396421 1592 break;
bc39777c
JA
1593 case 'k':
1594 kill_running_trace = 1;
1595 break;
ece238a6
NS
1596 case 'w':
1597 stop_watch = atoi(optarg);
1598 if (stop_watch <= 0) {
1599 fprintf(stderr,
1600 "Invalid stopwatch value (%d secs)\n",
1601 stop_watch);
1602 return 1;
1603 }
1604 break;
57ea8602 1605 case 'V':
52724a0e
JA
1606 printf("%s version %s\n", argv[0], blktrace_version);
1607 return 0;
129aa440 1608 case 'b':
eb3c8108 1609 buf_size = strtoul(optarg, NULL, 10);
183a0855 1610 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 1611 fprintf(stderr,
eb3c8108 1612 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
1613 return 1;
1614 }
1615 buf_size <<= 10;
1616 break;
1617 case 'n':
eb3c8108 1618 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1619 if (buf_nr <= 0) {
1620 fprintf(stderr,
eb3c8108 1621 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1622 return 1;
1623 }
1624 break;
d1d7f15f
JA
1625 case 'D':
1626 output_dir = optarg;
1627 break;
8e86c98a
JA
1628 case 'h':
1629 net_mode = Net_client;
1630 strcpy(hostname, optarg);
1631 break;
1632 case 'l':
1633 net_mode = Net_server;
1634 break;
1635 case 'p':
1636 net_port = atoi(optarg);
1637 break;
32f18c48 1638 case 's':
f6fead25 1639 net_use_sendfile = 1;
32f18c48 1640 break;
d39c04ca 1641 default:
ee1f4158 1642 show_usage(argv[0]);
7425d456 1643 return 1;
d39c04ca
AB
1644 }
1645 }
1646
8e86c98a
JA
1647 setlocale(LC_NUMERIC, "en_US");
1648
1649 page_size = getpagesize();
1650
1651 if (net_mode == Net_server)
1652 return net_server();
1653
22cd0c02
JA
1654 while (optind < argc) {
1655 if (resize_devices(argv[optind++]) != 0)
1656 return 1;
1657 }
1658
e7c9f3ff 1659 if (ndevs == 0) {
ee1f4158 1660 show_usage(argv[0]);
7425d456 1661 return 1;
d39c04ca
AB
1662 }
1663
5270dddd
JA
1664 if (!relay_path)
1665 relay_path = default_relay_path;
1666
d5396421 1667 if (act_mask_tmp != 0)
d39c04ca 1668 act_mask = act_mask_tmp;
d0ca268b 1669
e3e74029
NS
1670 if (statfs(relay_path, &st) < 0) {
1671 perror("statfs");
1672 fprintf(stderr,"%s does not appear to be a valid path\n",
1673 relay_path);
1674 return 1;
64acacae 1675 } else if (st.f_type != (long) RELAYFS_TYPE) {
e3e74029 1676 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
d0ca268b 1677 relay_path);
7425d456 1678 return 1;
d0ca268b
JA
1679 }
1680
e7c9f3ff 1681 if (open_devices() != 0)
7425d456 1682 return 1;
bc39777c
JA
1683
1684 if (kill_running_trace) {
e7c9f3ff 1685 stop_all_traces();
7425d456 1686 return 0;
bc39777c
JA
1687 }
1688
e7c9f3ff
NS
1689 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1690 if (ncpus < 0) {
1691 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 1692 return 1;
d0ca268b
JA
1693 }
1694
d0ca268b
JA
1695 signal(SIGINT, handle_sigint);
1696 signal(SIGHUP, handle_sigint);
1697 signal(SIGTERM, handle_sigint);
ece238a6 1698 signal(SIGALRM, handle_sigint);
d0ca268b 1699
8e86c98a
JA
1700 if (net_mode == Net_client && net_setup_client())
1701 return 1;
1702
1703 if (start_devices() != 0)
1704 return 1;
1705
e7c9f3ff 1706 atexit(stop_all_tracing);
830fd65c 1707
ece238a6
NS
1708 if (stop_watch)
1709 alarm(stop_watch);
1710
b7106311 1711 wait_for_threads();
d0ca268b 1712
eb3c8108
JA
1713 if (!is_trace_stopped()) {
1714 trace_stopped = 1;
91816d54
JA
1715 stop_all_threads();
1716 stop_all_traces();
91816d54 1717 }
d0ca268b 1718
eb3c8108
JA
1719 show_stats();
1720
d0ca268b
JA
1721 return 0;
1722}
1723