[PATCH] blktrace: only do ioctl stop on local trace
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
eb3c8108 32#include <sys/poll.h>
b7106311 33#include <sys/mman.h>
8e86c98a 34#include <sys/socket.h>
d0ca268b
JA
35#include <stdio.h>
36#include <stdlib.h>
37#include <sched.h>
d39c04ca
AB
38#include <ctype.h>
39#include <getopt.h>
da39451f 40#include <errno.h>
8e86c98a
JA
41#include <netinet/in.h>
42#include <arpa/inet.h>
43#include <netdb.h>
32f18c48 44#include <sys/sendfile.h>
d0ca268b
JA
45
46#include "blktrace.h"
21f55651 47#include "barrier.h"
d0ca268b 48
13d928f0 49static char blktrace_version[] = "0.99";
52724a0e 50
8f551a39
JA
51/*
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
54 */
007c233c 55#define BUF_SIZE (512 * 1024)
d0ca268b
JA
56#define BUF_NR (4)
57
007c233c
JA
58#define OFILE_BUF (128 * 1024)
59
e3e74029
NS
60#define RELAYFS_TYPE 0xF0B4A981
61
32f18c48 62#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
d5396421 63static struct option l_opts[] = {
5c86134e 64 {
d39c04ca 65 .name = "dev",
428683db 66 .has_arg = required_argument,
d39c04ca
AB
67 .flag = NULL,
68 .val = 'd'
69 },
5c86134e 70 {
d39c04ca 71 .name = "act-mask",
428683db 72 .has_arg = required_argument,
d39c04ca
AB
73 .flag = NULL,
74 .val = 'a'
75 },
5c86134e 76 {
d39c04ca 77 .name = "set-mask",
428683db 78 .has_arg = required_argument,
d39c04ca
AB
79 .flag = NULL,
80 .val = 'A'
81 },
5c86134e 82 {
5270dddd 83 .name = "relay",
428683db 84 .has_arg = required_argument,
5270dddd
JA
85 .flag = NULL,
86 .val = 'r'
87 },
d5396421
JA
88 {
89 .name = "output",
428683db 90 .has_arg = required_argument,
d5396421
JA
91 .flag = NULL,
92 .val = 'o'
93 },
bc39777c
JA
94 {
95 .name = "kill",
428683db 96 .has_arg = no_argument,
bc39777c
JA
97 .flag = NULL,
98 .val = 'k'
99 },
ece238a6
NS
100 {
101 .name = "stopwatch",
428683db 102 .has_arg = required_argument,
ece238a6
NS
103 .flag = NULL,
104 .val = 'w'
105 },
52724a0e
JA
106 {
107 .name = "version",
108 .has_arg = no_argument,
109 .flag = NULL,
57ea8602 110 .val = 'V'
52724a0e 111 },
129aa440 112 {
3f65c585 113 .name = "buffer-size",
129aa440
JA
114 .has_arg = required_argument,
115 .flag = NULL,
116 .val = 'b'
117 },
118 {
3f65c585 119 .name = "num-sub-buffers",
129aa440
JA
120 .has_arg = required_argument,
121 .flag = NULL,
122 .val = 'n'
123 },
d1d7f15f 124 {
3f65c585 125 .name = "output-dir",
d1d7f15f
JA
126 .has_arg = required_argument,
127 .flag = NULL,
128 .val = 'D'
129 },
8e86c98a
JA
130 {
131 .name = "listen",
132 .has_arg = no_argument,
133 .flag = NULL,
134 .val = 'l'
135 },
136 {
137 .name = "host",
138 .has_arg = required_argument,
139 .flag = NULL,
140 .val = 'h'
141 },
142 {
143 .name = "port",
144 .has_arg = required_argument,
145 .flag = NULL,
146 .val = 'p'
147 },
32f18c48
JA
148 {
149 .name = "sendfile",
150 .has_arg = no_argument,
151 .flag = NULL,
152 .val = 's'
153 },
71ef8b7c
JA
154 {
155 .name = NULL,
156 }
d39c04ca
AB
157};
158
9db17354 159struct tip_subbuf {
9db17354 160 void *buf;
5be4bdaf
JA
161 unsigned int len;
162 unsigned int max_len;
32f18c48 163 off_t offset;
9db17354
JA
164};
165
21f55651
JA
166#define FIFO_SIZE (1024) /* should be plenty big! */
167#define CL_SIZE (128) /* cache line, any bigger? */
168
169struct tip_subbuf_fifo {
170 int tail __attribute__((aligned(CL_SIZE)));
171 int head __attribute__((aligned(CL_SIZE)));
172 struct tip_subbuf *q[FIFO_SIZE];
173};
174
d0ca268b
JA
175struct thread_information {
176 int cpu;
177 pthread_t thread;
b9d4294e
JA
178
179 int fd;
a3e4d330 180 void *fd_buf;
b9d4294e
JA
181 char fn[MAXPATHLEN + 64];
182
11eedd9b
JA
183 int pfd;
184 size_t *pfd_buf;
185
e3bf54d8
JA
186 struct in_addr cl_in_addr;
187
007c233c
JA
188 FILE *ofile;
189 char *ofile_buffer;
32f18c48 190 off_t ofile_offset;
9db17354 191 int ofile_stdout;
8e86c98a 192 int ofile_mmap;
007c233c 193
0cc7d25e
JA
194 int (*get_subbuf)(struct thread_information *, unsigned int);
195 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
196 int (*read_data)(struct thread_information *, void *, unsigned int);
197
d0ca268b 198 unsigned long events_processed;
b7106311 199 unsigned long long data_read;
e7c9f3ff 200 struct device_information *device;
9db17354
JA
201
202 int exited;
203
b7106311
JA
204 /*
205 * piped fifo buffers
206 */
21f55651 207 struct tip_subbuf_fifo fifo;
7de86b12 208 struct tip_subbuf *leftover_ts;
b7106311
JA
209
210 /*
211 * mmap controlled output files
212 */
213 unsigned long long fs_size;
214 unsigned long long fs_max_size;
215 unsigned long fs_off;
216 void *fs_buf;
217 unsigned long fs_buf_len;
d0ca268b
JA
218};
219
e7c9f3ff
NS
220struct device_information {
221 int fd;
222 char *path;
223 char buts_name[32];
99c1f5ab 224 volatile int trace_started;
eb3c8108 225 unsigned long drop_count;
e7c9f3ff
NS
226 struct thread_information *threads;
227};
d0ca268b 228
e7c9f3ff 229static int ncpus;
d0ca268b 230static struct thread_information *thread_information;
e7c9f3ff
NS
231static int ndevs;
232static struct device_information *device_information;
233
234/* command line option globals */
235static char *relay_path;
d5396421 236static char *output_name;
d1d7f15f 237static char *output_dir;
5c86134e 238static int act_mask = ~0U;
bc39777c 239static int kill_running_trace;
eb3c8108
JA
240static unsigned long buf_size = BUF_SIZE;
241static unsigned long buf_nr = BUF_NR;
b7106311 242static unsigned int page_size;
d39c04ca 243
e7c9f3ff
NS
244#define is_done() (*(volatile int *)(&done))
245static volatile int done;
246
eb3c8108
JA
247#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
248static volatile int trace_stopped;
249
250#define is_stat_shown() (*(volatile int *)(&stat_shown))
251static volatile int stat_shown;
a3e4d330 252
8e86c98a
JA
253int data_is_native = -1;
254
72ca8801
NS
255static void exit_trace(int status);
256
99c1f5ab
JA
257#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
258#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
259
260#define __for_each_dip(__d, __i, __e) \
261 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
262
263#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
9db17354
JA
264#define for_each_tip(__d, __t, __j) \
265 for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
99c1f5ab 266
8e86c98a
JA
267/*
268 * networking stuff follows. we include a magic number so we know whether
269 * to endianness convert or not
270 */
271struct blktrace_net_hdr {
272 u32 magic; /* same as trace magic */
22cd0c02 273 char buts_name[32]; /* trace name */
8e86c98a 274 u32 cpu; /* for which cpu */
22cd0c02 275 u32 max_cpus;
8e86c98a
JA
276 u32 len; /* length of following trace data */
277};
278
279#define TRACE_NET_PORT (8462)
280
281enum {
282 Net_none = 0,
283 Net_server,
284 Net_client,
285};
286
287/*
288 * network cmd line params
289 */
290static char hostname[MAXHOSTNAMELEN];
291static int net_port = TRACE_NET_PORT;
292static int net_mode = 0;
32f18c48 293static int net_sendfile;
8e86c98a
JA
294
295static int net_in_fd = -1;
296static int net_out_fd = -1;
297
298static void handle_sigint(__attribute__((__unused__)) int sig)
299{
7035d92d
JA
300 struct device_information *dip;
301 int i;
302
303 /*
304 * stop trace so we can reap currently produced data
305 */
306 for_each_dip(dip, i) {
921b05fe
JA
307 if (dip->fd == -1)
308 continue;
7035d92d
JA
309 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
310 perror("BLKTRACESTOP");
311 }
312
8e86c98a
JA
313 done = 1;
314}
315
eb3c8108
JA
316static int get_dropped_count(const char *buts_name)
317{
318 int fd;
319 char tmp[MAXPATHLEN + 64];
320
321 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
322 relay_path, buts_name);
323
324 fd = open(tmp, O_RDONLY);
325 if (fd < 0) {
326 /*
327 * this may be ok, if the kernel doesn't support dropped counts
328 */
329 if (errno == ENOENT)
330 return 0;
331
332 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
333 return -1;
334 }
335
336 if (read(fd, tmp, sizeof(tmp)) < 0) {
337 perror(tmp);
338 close(fd);
339 return -1;
340 }
341
342 close(fd);
343
344 return atoi(tmp);
345}
346
11eedd9b
JA
347static size_t get_subbuf_padding(struct thread_information *tip,
348 unsigned subbuf)
349{
350 size_t padding_size = buf_nr * sizeof(size_t);
351 size_t ret;
352
353 if (read(tip->pfd, tip->pfd_buf, padding_size) < 0) {
354 perror("tip pad read");
355 ret = -1;
356 } else
357 ret = tip->pfd_buf[subbuf];
358
359 return ret;
360}
361
e7c9f3ff 362static int start_trace(struct device_information *dip)
d0ca268b
JA
363{
364 struct blk_user_trace_setup buts;
365
1f79c4a0 366 memset(&buts, 0, sizeof(buts));
129aa440
JA
367 buts.buf_size = buf_size;
368 buts.buf_nr = buf_nr;
d39c04ca 369 buts.act_mask = act_mask;
d0ca268b 370
ed71a31e
JA
371 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
372 perror("BLKTRACESETUP");
373 return 1;
374 }
375
376 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
377 perror("BLKTRACESTART");
d0ca268b
JA
378 return 1;
379 }
380
e7c9f3ff 381 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 382 dip_set_tracing(dip, 1);
d0ca268b
JA
383 return 0;
384}
385
e7c9f3ff 386static void stop_trace(struct device_information *dip)
d0ca268b 387{
99c1f5ab
JA
388 if (dip_tracing(dip) || kill_running_trace) {
389 dip_set_tracing(dip, 0);
cf9208ea 390
7035d92d
JA
391 /*
392 * should be stopped, just don't complain if it isn't
393 */
394 ioctl(dip->fd, BLKTRACESTOP);
395
ed71a31e
JA
396 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
397 perror("BLKTRACETEARDOWN");
cf9208ea 398
e7c9f3ff 399 close(dip->fd);
cf9208ea 400 dip->fd = -1;
707b0914 401 }
d0ca268b
JA
402}
403
e7c9f3ff
NS
404static void stop_all_traces(void)
405{
406 struct device_information *dip;
407 int i;
408
eb3c8108
JA
409 for_each_dip(dip, i) {
410 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 411 stop_trace(dip);
eb3c8108 412 }
e7c9f3ff
NS
413}
414
eb3c8108
JA
415static void wait_for_data(struct thread_information *tip)
416{
417 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
418
9db17354 419 do {
b4aabcb3 420 poll(&pfd, 1, 100);
9db17354
JA
421 if (pfd.revents & POLLIN)
422 break;
423 if (tip->ofile_stdout)
424 break;
425 } while (!is_done());
eb3c8108
JA
426}
427
0cc7d25e
JA
428static int read_data_file(struct thread_information *tip, void *buf,
429 unsigned int len)
d0ca268b 430{
ae9f71b3 431 int ret = 0;
bbabf03a 432
9db17354
JA
433 do {
434 wait_for_data(tip);
ae9f71b3 435
9db17354
JA
436 ret = read(tip->fd, buf, len);
437 if (!ret)
438 continue;
439 else if (ret > 0)
440 return ret;
441 else {
bbabf03a 442 if (errno != EAGAIN) {
a3e4d330
JA
443 perror(tip->fn);
444 fprintf(stderr,"Thread %d failed read of %s\n",
445 tip->cpu, tip->fn);
446 break;
447 }
9db17354 448 continue;
bbabf03a 449 }
9db17354 450 } while (!is_done());
8a43bac5 451
bbabf03a 452 return ret;
8e86c98a 453
8a43bac5
JA
454}
455
0cc7d25e
JA
456static int read_data_net(struct thread_information *tip, void *buf,
457 unsigned int len)
8e86c98a
JA
458{
459 unsigned int bytes_left = len;
460 int ret = 0;
461
462 do {
463 ret = recv(net_in_fd, buf, bytes_left, MSG_WAITALL);
464
465 if (!ret)
466 continue;
467 else if (ret < 0) {
468 if (errno != EAGAIN) {
469 perror(tip->fn);
470 fprintf(stderr, "server: failed read\n");
471 return 0;
472 }
473 continue;
474 } else {
475 buf += ret;
476 bytes_left -= ret;
477 }
478 } while (!is_done() && bytes_left);
479
410d7c62 480 return len - bytes_left;
8e86c98a
JA
481}
482
0cc7d25e
JA
483static int read_data(struct thread_information *tip, void *buf,
484 unsigned int len)
8e86c98a 485{
663962f7 486 return tip->read_data(tip, buf, len);
8e86c98a
JA
487}
488
489static inline struct tip_subbuf *
490subbuf_fifo_dequeue(struct thread_information *tip)
a3e4d330 491{
21f55651
JA
492 const int head = tip->fifo.head;
493 const int next = (head + 1) & (FIFO_SIZE - 1);
494
495 if (head != tip->fifo.tail) {
496 struct tip_subbuf *ts = tip->fifo.q[head];
497
498 store_barrier();
499 tip->fifo.head = next;
500 return ts;
501 }
502
503 return NULL;
9db17354 504}
eb3c8108 505
21f55651
JA
506static inline int subbuf_fifo_queue(struct thread_information *tip,
507 struct tip_subbuf *ts)
9db17354 508{
21f55651
JA
509 const int tail = tip->fifo.tail;
510 const int next = (tail + 1) & (FIFO_SIZE - 1);
511
512 if (next != tip->fifo.head) {
513 tip->fifo.q[tail] = ts;
514 store_barrier();
515 tip->fifo.tail = next;
516 return 0;
517 }
518
519 fprintf(stderr, "fifo too small!\n");
520 return 1;
a3e4d330
JA
521}
522
b7106311
JA
523/*
524 * For file output, truncate and mmap the file appropriately
525 */
8e86c98a 526static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
b7106311
JA
527{
528 int ofd = fileno(tip->ofile);
529 int ret;
530
531 /*
532 * extend file, if we have to. use chunks of 16 subbuffers.
533 */
534 if (tip->fs_off + buf_size > tip->fs_buf_len) {
535 if (tip->fs_buf) {
5975d309 536 munlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
537 munmap(tip->fs_buf, tip->fs_buf_len);
538 tip->fs_buf = NULL;
539 }
540
541 tip->fs_off = tip->fs_size & (page_size - 1);
542 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
543 tip->fs_max_size += tip->fs_buf_len;
544
545 if (ftruncate(ofd, tip->fs_max_size) < 0) {
546 perror("ftruncate");
547 return -1;
548 }
549
550 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
551 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
552 if (tip->fs_buf == MAP_FAILED) {
553 perror("mmap");
554 return -1;
555 }
5975d309 556 mlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
557 }
558
8e86c98a 559 ret = read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
b7106311 560 if (ret >= 0) {
dbfbd6db 561 tip->data_read += ret;
b7106311
JA
562 tip->fs_size += ret;
563 tip->fs_off += ret;
564 return 0;
565 }
566
567 return -1;
568}
569
18eed2a7
JA
570/*
571 * Use the copy approach for pipes and network
572 */
573static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
574{
575 struct tip_subbuf *ts = malloc(sizeof(*ts));
576 int ret;
577
578 ts->buf = malloc(buf_size);
579 ts->max_len = maxlen;
580
581 ret = read_data(tip, ts->buf, ts->max_len);
582 if (ret > 0) {
583 ts->len = ret;
dbfbd6db 584 tip->data_read += ret;
7035d92d
JA
585 if (subbuf_fifo_queue(tip, ts))
586 return -1;
18eed2a7
JA
587 }
588
589 return ret;
590}
591
32f18c48
JA
592static int get_subbuf_sendfile(struct thread_information *tip,
593 unsigned int maxlen)
594{
11eedd9b 595 struct tip_subbuf *ts;
32f18c48 596 struct stat sb;
7035d92d 597 unsigned int ready, this_size, total;
32f18c48 598
18eed2a7
JA
599 wait_for_data(tip);
600
601 /*
602 * hack to get last data out, we can't use sendfile for that
603 */
604 if (is_done())
605 return get_subbuf(tip, maxlen);
606
32f18c48
JA
607 if (fstat(tip->fd, &sb) < 0) {
608 perror("trace stat");
7035d92d 609 return -1;
32f18c48
JA
610 }
611
11eedd9b 612 ready = sb.st_size - tip->ofile_offset;
663962f7
JA
613 if (!ready) {
614 /*
7035d92d 615 * delay a little, since poll() will return data available
663962f7
JA
616 * until sendfile() is run
617 */
618 usleep(100);
11eedd9b 619 return 0;
663962f7 620 }
11eedd9b 621
1be42f3d 622 this_size = buf_size;
7035d92d 623 total = ready;
11eedd9b 624 while (ready) {
1be42f3d
JA
625 if (this_size > ready)
626 this_size = ready;
627
11eedd9b
JA
628 ts = malloc(sizeof(*ts));
629
11eedd9b 630 ts->buf = NULL;
7035d92d 631 ts->max_len = 0;
11eedd9b 632
1be42f3d 633 ts->len = this_size;
11eedd9b
JA
634 ts->offset = tip->ofile_offset;
635 tip->ofile_offset += ts->len;
636
7035d92d
JA
637 if (subbuf_fifo_queue(tip, ts))
638 return -1;
11eedd9b 639
1be42f3d 640 ready -= this_size;
11eedd9b
JA
641 }
642
7035d92d 643 return total;
32f18c48
JA
644}
645
9db17354 646static void close_thread(struct thread_information *tip)
a3e4d330 647{
9db17354
JA
648 if (tip->fd != -1)
649 close(tip->fd);
11eedd9b
JA
650 if (tip->pfd != -1)
651 close(tip->pfd);
9db17354
JA
652 if (tip->ofile)
653 fclose(tip->ofile);
654 if (tip->ofile_buffer)
655 free(tip->ofile_buffer);
656 if (tip->fd_buf)
657 free(tip->fd_buf);
11eedd9b
JA
658 if (tip->pfd_buf)
659 free(tip->pfd_buf);
1c99bc21 660
9db17354 661 tip->fd = -1;
11eedd9b 662 tip->pfd = -1;
9db17354
JA
663 tip->ofile = NULL;
664 tip->ofile_buffer = NULL;
665 tip->fd_buf = NULL;
a3e4d330
JA
666}
667
8e86c98a
JA
668static void tip_ftrunc_final(struct thread_information *tip)
669{
670 /*
671 * truncate to right size and cleanup mmap
672 */
673 if (tip->ofile_mmap) {
674 int ofd = fileno(tip->ofile);
675
676 if (tip->fs_buf)
677 munmap(tip->fs_buf, tip->fs_buf_len);
678
679 ftruncate(ofd, tip->fs_size);
680 }
681}
682
9db17354 683static void *thread_main(void *arg)
a3e4d330 684{
9db17354
JA
685 struct thread_information *tip = arg;
686 pid_t pid = getpid();
687 cpu_set_t cpu_mask;
a3e4d330 688
9db17354
JA
689 CPU_ZERO(&cpu_mask);
690 CPU_SET((tip->cpu), &cpu_mask);
a3e4d330 691
9db17354
JA
692 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
693 perror("sched_setaffinity");
694 exit_trace(1);
695 }
a3e4d330 696
9db17354
JA
697 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
698 relay_path, tip->device->buts_name, tip->cpu);
699 tip->fd = open(tip->fn, O_RDONLY);
700 if (tip->fd < 0) {
701 perror(tip->fn);
702 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
703 tip->fn);
704 exit_trace(1);
a3e4d330
JA
705 }
706
11eedd9b
JA
707 if (net_mode == Net_client && net_sendfile) {
708 char tmp[MAXPATHLEN + 64];
709
710 snprintf(tmp, sizeof(tmp), "%s/block/%s/trace%d.padding",
711 relay_path, tip->device->buts_name, tip->cpu);
712
713 tip->pfd = open(tmp, O_RDONLY);
714 if (tip->pfd < 0) {
715 fprintf(stderr, "Couldn't open padding file %s\n", tmp);
716 exit_trace(1);
717 }
718
719 tip->pfd_buf = malloc(buf_nr * sizeof(size_t));
720 }
721
b7106311 722 while (!is_done()) {
7035d92d 723 if (tip->get_subbuf(tip, buf_size) < 0)
0cc7d25e 724 break;
b7106311
JA
725 }
726
7035d92d
JA
727 /*
728 * trace is stopped, pull data until we get a short read
729 */
730 while (tip->get_subbuf(tip, buf_size) > 0)
731 ;
732
8e86c98a
JA
733 tip_ftrunc_final(tip);
734 tip->exited = 1;
735 return NULL;
736}
b7106311 737
8e86c98a
JA
738static int write_data_net(int fd, void *buf, unsigned int buf_len)
739{
740 unsigned int bytes_left = buf_len;
741 int ret;
b7106311 742
8e86c98a
JA
743 while (bytes_left) {
744 ret = send(fd, buf, bytes_left, 0);
745 if (ret < 0) {
746 perror("send");
747 return 1;
748 }
749
750 buf += ret;
751 bytes_left -= ret;
9db17354 752 }
a3e4d330 753
8e86c98a 754 return 0;
a3e4d330
JA
755}
756
32f18c48 757static int net_send_header(struct thread_information *tip, unsigned int len)
8e86c98a
JA
758{
759 struct blktrace_net_hdr hdr;
8e86c98a
JA
760
761 hdr.magic = BLK_IO_TRACE_MAGIC;
22cd0c02 762 strcpy(hdr.buts_name, tip->device->buts_name);
8e86c98a 763 hdr.cpu = tip->cpu;
22cd0c02 764 hdr.max_cpus = ncpus;
32f18c48 765 hdr.len = len;
8e86c98a 766
32f18c48
JA
767 return write_data_net(net_out_fd, &hdr, sizeof(hdr));
768}
8e86c98a 769
6a752c90
JA
770/*
771 * send header with 0 length to signal end-of-run
772 */
773static void net_client_send_close(void)
774{
775 struct blktrace_net_hdr hdr;
776
777 hdr.magic = BLK_IO_TRACE_MAGIC;
778 hdr.cpu = 0;
779 hdr.max_cpus = ncpus;
780 hdr.len = 0;
781
782 write_data_net(net_out_fd, &hdr, sizeof(hdr));
783}
784
32f18c48
JA
785static int flush_subbuf_net(struct thread_information *tip,
786 struct tip_subbuf *ts)
787{
788 if (net_send_header(tip, ts->len))
789 return 1;
22cd0c02
JA
790 if (write_data_net(net_out_fd, ts->buf, ts->len))
791 return 1;
8e86c98a 792
f0597a7e 793 free(ts->buf);
8e86c98a
JA
794 free(ts);
795 return 0;
796}
797
32f18c48
JA
798static int flush_subbuf_sendfile(struct thread_information *tip,
799 struct tip_subbuf *ts)
800{
11eedd9b
JA
801 size_t padding;
802 unsigned subbuf;
803 unsigned len;
18eed2a7
JA
804
805 /*
806 * currently we cannot use sendfile() on the last bytes read, as they
807 * may not be a full subbuffer. get_subbuf_sendfile() falls back to
808 * the read approach for those, so use send() to ship them out
809 */
810 if (ts->buf)
811 return flush_subbuf_net(tip, ts);
11eedd9b
JA
812
813 subbuf = (ts->offset / buf_size) % buf_nr;
814 padding = get_subbuf_padding(tip, subbuf);
815 len = ts->len - padding;
816
817 if (net_send_header(tip, len))
32f18c48 818 return 1;
11eedd9b 819 if (sendfile(net_out_fd, tip->fd, &ts->offset, len) < 0) {
32f18c48
JA
820 perror("sendfile");
821 return 1;
822 }
823
112a166e 824 tip->data_read += len;
32f18c48
JA
825 free(ts);
826 return 0;
827}
828
8e86c98a
JA
829static int write_data(struct thread_information *tip, void *buf,
830 unsigned int buf_len)
8a43bac5 831{
7126171a 832 int ret;
8a43bac5 833
6480258a
JA
834 if (!buf_len)
835 return 0;
836
7126171a
JA
837 while (1) {
838 ret = fwrite(buf, buf_len, 1, tip->ofile);
007c233c 839 if (ret == 1)
8a43bac5
JA
840 break;
841
db6fe5bc
JA
842 if (ret < 0) {
843 perror("write");
844 return 1;
8a43bac5 845 }
d0ca268b
JA
846 }
847
9db17354 848 if (tip->ofile_stdout)
7126171a
JA
849 fflush(tip->ofile);
850
8a43bac5
JA
851 return 0;
852}
853
8e86c98a
JA
854static int flush_subbuf_file(struct thread_information *tip,
855 struct tip_subbuf *ts)
8a43bac5 856{
9db17354
JA
857 unsigned int offset = 0;
858 struct blk_io_trace *t;
859 int pdu_len, events = 0;
8a43bac5 860
9db17354 861 /*
7de86b12 862 * surplus from last run
9db17354 863 */
7de86b12
AB
864 if (tip->leftover_ts) {
865 struct tip_subbuf *prev_ts = tip->leftover_ts;
866
9e8b753c 867 if (prev_ts->len + ts->len > prev_ts->max_len) {
7de86b12
AB
868 prev_ts->max_len += ts->len;
869 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
870 }
871
9e8b753c 872 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
7de86b12
AB
873 prev_ts->len += ts->len;
874
875 free(ts->buf);
876 free(ts);
877
878 ts = prev_ts;
879 tip->leftover_ts = NULL;
9db17354 880 }
d0ca268b 881
9db17354
JA
882 while (offset + sizeof(*t) <= ts->len) {
883 t = ts->buf + offset;
3a9d6c13 884
9cfa6c2b
AB
885 if (verify_trace(t)) {
886 write_data(tip, ts->buf, offset);
9db17354 887 return -1;
9cfa6c2b 888 }
3a9d6c13 889
9db17354 890 pdu_len = t->pdu_len;
3a9d6c13 891
9db17354 892 if (offset + sizeof(*t) + pdu_len > ts->len)
3a9d6c13 893 break;
4b5db44a 894
9db17354
JA
895 offset += sizeof(*t) + pdu_len;
896 tip->events_processed++;
b7106311 897 tip->data_read += sizeof(*t) + pdu_len;
9db17354 898 events++;
3a9d6c13
JA
899 }
900
9cfa6c2b
AB
901 if (write_data(tip, ts->buf, offset))
902 return -1;
903
3a9d6c13 904 /*
9db17354 905 * leftover bytes, save them for next time
3a9d6c13 906 */
9db17354 907 if (offset != ts->len) {
7de86b12 908 tip->leftover_ts = ts;
9e8b753c
JA
909 ts->len -= offset;
910 memmove(ts->buf, ts->buf + offset, ts->len);
7de86b12
AB
911 } else {
912 free(ts->buf);
913 free(ts);
9db17354 914 }
4b5db44a 915
9db17354 916 return events;
4b5db44a
JA
917}
918
9db17354 919static int write_tip_events(struct thread_information *tip)
d5396421 920{
21f55651 921 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
d5396421 922
0cc7d25e
JA
923 if (ts)
924 return tip->flush_subbuf(tip, ts);
91816d54 925
9db17354 926 return 0;
91816d54
JA
927}
928
9db17354
JA
929/*
930 * scans the tips we know and writes out the subbuffers we accumulate
931 */
932static void get_and_write_events(void)
d0ca268b 933{
9db17354
JA
934 struct device_information *dip;
935 struct thread_information *tip;
27223f19 936 int i, j, events, ret, tips_running;
d0ca268b 937
9db17354
JA
938 while (!is_done()) {
939 events = 0;
d0ca268b 940
9db17354
JA
941 for_each_dip(dip, i) {
942 for_each_tip(dip, tip, j) {
943 ret = write_tip_events(tip);
944 if (ret > 0)
945 events += ret;
946 }
947 }
d0ca268b 948
9db17354
JA
949 if (!events)
950 usleep(10);
d0ca268b
JA
951 }
952
a3e4d330 953 /*
9db17354 954 * reap stored events
a3e4d330 955 */
9db17354
JA
956 do {
957 events = 0;
27223f19 958 tips_running = 0;
9db17354
JA
959 for_each_dip(dip, i) {
960 for_each_tip(dip, tip, j) {
961 ret = write_tip_events(tip);
962 if (ret > 0)
963 events += ret;
27223f19 964 tips_running += !tip->exited;
9db17354 965 }
69e65a9e 966 }
9db17354 967 usleep(10);
27223f19 968 } while (events || tips_running);
d0ca268b
JA
969}
970
b7106311
JA
971static void wait_for_threads(void)
972{
973 /*
8e86c98a
JA
974 * for piped or network output, poll and fetch data for writeout.
975 * for files, we just wait around for trace threads to exit
b7106311 976 */
8e86c98a
JA
977 if ((output_name && !strcmp(output_name, "-")) ||
978 net_mode == Net_client)
b7106311
JA
979 get_and_write_events();
980 else {
981 struct device_information *dip;
982 struct thread_information *tip;
983 int i, j, tips_running;
984
985 do {
986 tips_running = 0;
987 usleep(1000);
988
989 for_each_dip(dip, i)
990 for_each_tip(dip, tip, j)
991 tips_running += !tip->exited;
992 } while (tips_running);
993 }
6a752c90
JA
994
995 if (net_mode == Net_client)
996 net_client_send_close();
b7106311
JA
997}
998
e3bf54d8
JA
999static int fill_ofname(struct thread_information *tip, char *dst,
1000 char *buts_name)
8e86c98a 1001{
e3bf54d8 1002 struct stat sb;
8e86c98a 1003 int len = 0;
e3bf54d8 1004 time_t t;
8e86c98a
JA
1005
1006 if (output_dir)
1007 len = sprintf(dst, "%s/", output_dir);
1008
e3bf54d8
JA
1009 if (net_mode == Net_server) {
1010 len += sprintf(dst + len, "%s-", inet_ntoa(tip->cl_in_addr));
1011 time(&t);
1012 len += strftime(dst + len, 64, "%F-%T/", gmtime(&t));
1013 }
1014
1015 if (stat(dst, &sb) < 0) {
1016 if (errno != ENOENT) {
1017 perror("stat");
1018 return 1;
1019 }
1020 if (mkdir(dst, 0755) < 0) {
1021 perror(dst);
1022 fprintf(stderr, "Can't make output dir\n");
1023 return 1;
1024 }
1025 }
1026
8e86c98a 1027 if (output_name)
e3bf54d8 1028 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
8e86c98a 1029 else
e3bf54d8
JA
1030 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1031
1032 return 0;
8e86c98a
JA
1033}
1034
0cc7d25e
JA
1035static void fill_ops(struct thread_information *tip)
1036{
1037 /*
1038 * setup ops
1039 */
32f18c48
JA
1040 if (net_mode == Net_client) {
1041 if (net_sendfile) {
1042 tip->get_subbuf = get_subbuf_sendfile;
1043 tip->flush_subbuf = flush_subbuf_sendfile;
1044 } else {
1045 tip->get_subbuf = get_subbuf;
1046 tip->flush_subbuf = flush_subbuf_net;
1047 }
1048 } else {
1049 if (tip->ofile_mmap)
1050 tip->get_subbuf = mmap_subbuf;
1051 else
1052 tip->get_subbuf = get_subbuf;
0cc7d25e 1053
0cc7d25e 1054 tip->flush_subbuf = flush_subbuf_file;
32f18c48
JA
1055 }
1056
0cc7d25e
JA
1057 if (net_mode == Net_server)
1058 tip->read_data = read_data_net;
1059 else
1060 tip->read_data = read_data_file;
1061}
1062
ddf22842
JA
1063static int tip_open_output(struct device_information *dip,
1064 struct thread_information *tip)
d0ca268b 1065{
ddf22842 1066 int pipeline = output_name && !strcmp(output_name, "-");
8e86c98a 1067 int mode, vbuf_size;
e3bf54d8 1068 char op[128];
d0ca268b 1069
ddf22842
JA
1070 if (net_mode == Net_client) {
1071 tip->ofile = NULL;
1072 tip->ofile_stdout = 0;
1073 tip->ofile_mmap = 0;
0c0b75b4 1074 goto done;
ddf22842
JA
1075 } else if (pipeline) {
1076 tip->ofile = fdopen(STDOUT_FILENO, "w");
1077 tip->ofile_stdout = 1;
1078 tip->ofile_mmap = 0;
1079 mode = _IOLBF;
1080 vbuf_size = 512;
1081 } else {
e3bf54d8
JA
1082 if (fill_ofname(tip, op, dip->buts_name))
1083 return 1;
ddf22842
JA
1084 tip->ofile = fopen(op, "w+");
1085 tip->ofile_stdout = 0;
1086 tip->ofile_mmap = 1;
1087 mode = _IOFBF;
1088 vbuf_size = OFILE_BUF;
1089 }
d5396421 1090
0c0b75b4 1091 if (tip->ofile == NULL) {
ddf22842
JA
1092 perror(op);
1093 return 1;
1094 }
d5396421 1095
0c0b75b4
JA
1096 tip->ofile_buffer = malloc(vbuf_size);
1097 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1098 perror("setvbuf");
1099 close_thread(tip);
1100 return 1;
ddf22842
JA
1101 }
1102
0c0b75b4 1103done:
ddf22842
JA
1104 fill_ops(tip);
1105 return 0;
1106}
007c233c 1107
ddf22842
JA
1108static int start_threads(struct device_information *dip)
1109{
1110 struct thread_information *tip;
1111 int j;
1112
1113 for_each_tip(dip, tip, j) {
1114 tip->cpu = j;
1115 tip->device = dip;
1116 tip->events_processed = 0;
11eedd9b
JA
1117 tip->fd = -1;
1118 tip->pfd = -1;
ddf22842
JA
1119 memset(&tip->fifo, 0, sizeof(tip->fifo));
1120 tip->leftover_ts = NULL;
1121
1122 if (tip_open_output(dip, tip))
1123 return 1;
0cc7d25e 1124
9db17354 1125 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
e7c9f3ff 1126 perror("pthread_create");
007c233c 1127 close_thread(tip);
e7c9f3ff 1128 return 1;
d0ca268b
JA
1129 }
1130 }
1131
e7c9f3ff 1132 return 0;
d0ca268b
JA
1133}
1134
e7c9f3ff 1135static void stop_threads(struct device_information *dip)
3aabcd89 1136{
e7c9f3ff 1137 struct thread_information *tip;
91816d54 1138 unsigned long ret;
007c233c
JA
1139 int i;
1140
9db17354 1141 for_each_tip(dip, tip, i) {
91816d54 1142 (void) pthread_join(tip->thread, (void *) &ret);
9db17354
JA
1143 close_thread(tip);
1144 }
3aabcd89
JA
1145}
1146
e7c9f3ff 1147static void stop_all_threads(void)
72ca8801 1148{
e7c9f3ff 1149 struct device_information *dip;
72ca8801
NS
1150 int i;
1151
99c1f5ab 1152 for_each_dip(dip, i)
e7c9f3ff
NS
1153 stop_threads(dip);
1154}
1155
1156static void stop_all_tracing(void)
1157{
1158 struct device_information *dip;
91816d54 1159 int i;
007c233c 1160
91816d54 1161 for_each_dip(dip, i)
e7c9f3ff 1162 stop_trace(dip);
72ca8801
NS
1163}
1164
1165static void exit_trace(int status)
1166{
eb3c8108
JA
1167 if (!is_trace_stopped()) {
1168 trace_stopped = 1;
1169 stop_all_threads();
1170 stop_all_tracing();
1171 }
1172
72ca8801
NS
1173 exit(status);
1174}
1175
e7c9f3ff
NS
1176static int resize_devices(char *path)
1177{
1178 int size = (ndevs + 1) * sizeof(struct device_information);
1179
1180 device_information = realloc(device_information, size);
1181 if (!device_information) {
1182 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1183 return 1;
1184 }
1185 device_information[ndevs].path = path;
1186 ndevs++;
1187 return 0;
1188}
1189
1190static int open_devices(void)
d0ca268b 1191{
e7c9f3ff 1192 struct device_information *dip;
d0ca268b 1193 int i;
d0ca268b 1194
99c1f5ab 1195 for_each_dip(dip, i) {
cf9208ea 1196 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
1197 if (dip->fd < 0) {
1198 perror(dip->path);
1199 return 1;
1200 }
1201 }
99c1f5ab 1202
e7c9f3ff
NS
1203 return 0;
1204}
1205
1206static int start_devices(void)
1207{
1208 struct device_information *dip;
1209 int i, j, size;
1210
1211 size = ncpus * sizeof(struct thread_information);
1212 thread_information = malloc(size * ndevs);
1213 if (!thread_information) {
1214 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1215 return 1;
1216 }
d5396421 1217
99c1f5ab 1218 for_each_dip(dip, i) {
e7c9f3ff
NS
1219 if (start_trace(dip)) {
1220 close(dip->fd);
1221 fprintf(stderr, "Failed to start trace on %s\n",
1222 dip->path);
1223 break;
1224 }
1225 }
99c1f5ab 1226
e7c9f3ff 1227 if (i != ndevs) {
99c1f5ab 1228 __for_each_dip(dip, j, i)
e7c9f3ff 1229 stop_trace(dip);
99c1f5ab 1230
e7c9f3ff
NS
1231 return 1;
1232 }
1233
99c1f5ab 1234 for_each_dip(dip, i) {
e7c9f3ff
NS
1235 dip->threads = thread_information + (i * ncpus);
1236 if (start_threads(dip)) {
1237 fprintf(stderr, "Failed to start worker threads\n");
1238 break;
1239 }
1240 }
99c1f5ab 1241
e7c9f3ff 1242 if (i != ndevs) {
99c1f5ab 1243 __for_each_dip(dip, j, i)
e7c9f3ff 1244 stop_threads(dip);
99c1f5ab 1245 for_each_dip(dip, i)
e7c9f3ff 1246 stop_trace(dip);
99c1f5ab 1247
e7c9f3ff 1248 return 1;
d0ca268b
JA
1249 }
1250
e7c9f3ff 1251 return 0;
d0ca268b
JA
1252}
1253
e7c9f3ff
NS
1254static void show_stats(void)
1255{
e7c9f3ff
NS
1256 struct device_information *dip;
1257 struct thread_information *tip;
b7106311 1258 unsigned long long events_processed, data_read;
eb3c8108 1259 unsigned long total_drops;
2f903295 1260 int i, j, no_stdout = 0;
eb3c8108
JA
1261
1262 if (is_stat_shown())
1263 return;
1264
2f903295
JA
1265 if (output_name && !strcmp(output_name, "-"))
1266 no_stdout = 1;
1267
eb3c8108 1268 stat_shown = 1;
428683db 1269
56070ea4 1270 total_drops = 0;
99c1f5ab 1271 for_each_dip(dip, i) {
2f903295 1272 if (!no_stdout)
56070ea4 1273 printf("Device: %s\n", dip->path);
e7c9f3ff 1274 events_processed = 0;
b7106311 1275 data_read = 0;
99c1f5ab 1276 for_each_tip(dip, tip, j) {
2f903295 1277 if (!no_stdout)
b7106311
JA
1278 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1279 tip->cpu, tip->events_processed,
54824c20 1280 (tip->data_read + 1023) >> 10);
e7c9f3ff 1281 events_processed += tip->events_processed;
b7106311 1282 data_read += tip->data_read;
e7c9f3ff 1283 }
eb3c8108 1284 total_drops += dip->drop_count;
2f903295 1285 if (!no_stdout)
b7106311
JA
1286 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1287 events_processed, dip->drop_count,
18d8437d 1288 (data_read + 1023) >> 10);
e7c9f3ff 1289 }
56070ea4
JA
1290
1291 if (total_drops)
1292 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 1293}
52724a0e 1294
e3bf54d8
JA
1295static struct device_information *net_get_dip(char *buts_name,
1296 struct in_addr *cl_in_addr)
8e86c98a 1297{
22cd0c02 1298 struct device_information *dip;
8e86c98a
JA
1299 int i;
1300
22cd0c02
JA
1301 for (i = 0; i < ndevs; i++) {
1302 dip = &device_information[i];
8e86c98a 1303
22cd0c02
JA
1304 if (!strcmp(dip->buts_name, buts_name))
1305 return dip;
8e86c98a
JA
1306 }
1307
22cd0c02
JA
1308 device_information = realloc(device_information, (ndevs + 1) * sizeof(*dip));
1309 dip = &device_information[ndevs];
921b05fe
JA
1310 memset(dip, 0, sizeof(*dip));
1311 dip->fd = -1;
22cd0c02 1312 strcpy(dip->buts_name, buts_name);
921b05fe 1313 dip->path = strdup(buts_name);
22cd0c02
JA
1314 ndevs++;
1315 dip->threads = malloc(ncpus * sizeof(struct thread_information));
1316 memset(dip->threads, 0, ncpus * sizeof(struct thread_information));
1317
1318 /*
1319 * open all files
1320 */
1321 for (i = 0; i < ncpus; i++) {
1322 struct thread_information *tip = &dip->threads[i];
8e86c98a 1323
22cd0c02 1324 tip->cpu = i;
22cd0c02 1325 tip->device = dip;
1366e53a
JA
1326 tip->fd = -1;
1327 tip->pfd = -1;
e3bf54d8 1328 tip->cl_in_addr = *cl_in_addr;
8e86c98a 1329
ddf22842 1330 if (tip_open_output(dip, tip))
22cd0c02 1331 return NULL;
8e86c98a
JA
1332 }
1333
22cd0c02
JA
1334 return dip;
1335}
1336
e3bf54d8
JA
1337static struct thread_information *net_get_tip(struct blktrace_net_hdr *bnh,
1338 struct in_addr *cl_in_addr)
22cd0c02
JA
1339{
1340 struct device_information *dip;
1341
1342 ncpus = bnh->max_cpus;
e3bf54d8 1343 dip = net_get_dip(bnh->buts_name, cl_in_addr);
22cd0c02 1344 return &dip->threads[bnh->cpu];
8e86c98a
JA
1345}
1346
1347static int net_get_header(struct blktrace_net_hdr *bnh)
1348{
1349 int fl = fcntl(net_in_fd, F_GETFL);
1350 int bytes_left, ret;
1351 void *p = bnh;
1352
1353 fcntl(net_in_fd, F_SETFL, fl | O_NONBLOCK);
1354 bytes_left = sizeof(*bnh);
1355 while (bytes_left && !is_done()) {
1356 ret = recv(net_in_fd, p, bytes_left, MSG_WAITALL);
1357 if (ret < 0) {
1358 if (errno != EAGAIN) {
1359 perror("recv header");
1360 return 1;
1361 }
1362 usleep(100);
1363 continue;
1364 } else if (!ret) {
1365 usleep(100);
1366 continue;
1367 } else {
1368 p += ret;
1369 bytes_left -= ret;
1370 }
1371 }
1372 fcntl(net_in_fd, F_SETFL, fl & ~O_NONBLOCK);
1373 return 0;
1374}
1375
e3bf54d8 1376static int net_server_loop(struct in_addr *cl_in_addr)
8e86c98a
JA
1377{
1378 struct thread_information *tip;
1379 struct blktrace_net_hdr bnh;
1380
1381 if (net_get_header(&bnh))
1382 return 1;
1383
1384 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1385 fprintf(stderr, "server: received data is bad\n");
1386 return 1;
1387 }
1388
1389 if (!data_is_native) {
1390 bnh.cpu = be32_to_cpu(bnh.cpu);
1391 bnh.len = be32_to_cpu(bnh.len);
1392 }
1393
6a752c90
JA
1394 /*
1395 * len == 0 means that the other end signalled end-of-run
1396 */
1397 if (!bnh.len) {
1398 fprintf(stderr, "server: end of run\n");
1399 return 1;
1400 }
1401
e3bf54d8 1402 tip = net_get_tip(&bnh, cl_in_addr);
8e86c98a
JA
1403 if (!tip)
1404 return 1;
1405
1406 if (mmap_subbuf(tip, bnh.len))
1407 return 1;
1408
1409 return 0;
1410}
1411
1412/*
1413 * Start here when we are in server mode - just fetch data from the network
1414 * and dump to files
1415 */
1416static int net_server(void)
1417{
898bbd3b
JA
1418 struct device_information *dip;
1419 struct thread_information *tip;
8e86c98a
JA
1420 struct sockaddr_in addr;
1421 socklen_t socklen;
22cd0c02 1422 int fd, opt, i, j;
8e86c98a
JA
1423
1424 fd = socket(AF_INET, SOCK_STREAM, 0);
1425 if (fd < 0) {
1426 perror("server: socket");
1427 return 1;
1428 }
1429
1430 opt = 1;
1431 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1432 perror("setsockopt");
1433 return 1;
1434 }
1435
1436 memset(&addr, 0, sizeof(addr));
1437 addr.sin_family = AF_INET;
1438 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1439 addr.sin_port = htons(net_port);
1440
1441 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1442 perror("bind");
1443 return 1;
1444 }
1445
1446 if (listen(fd, 1) < 0) {
1447 perror("listen");
1448 return 1;
1449 }
1450
6a752c90 1451repeat:
bbb1c18a
JA
1452 signal(SIGINT, NULL);
1453 signal(SIGHUP, NULL);
1454 signal(SIGTERM, NULL);
1455 signal(SIGALRM, NULL);
1456
8e86c98a
JA
1457 printf("blktrace: waiting for incoming connection...\n");
1458
1459 socklen = sizeof(addr);
1460 net_in_fd = accept(fd, (struct sockaddr *) &addr, &socklen);
1461 if (net_in_fd < 0) {
1462 perror("accept");
1463 return 1;
1464 }
1465
1466 signal(SIGINT, handle_sigint);
1467 signal(SIGHUP, handle_sigint);
1468 signal(SIGTERM, handle_sigint);
1469 signal(SIGALRM, handle_sigint);
1470
e85ee0f2 1471 printf("blktrace: connection from %s\n", inet_ntoa(addr.sin_addr));
8e86c98a
JA
1472
1473 while (!is_done()) {
e3bf54d8 1474 if (net_server_loop(&addr.sin_addr))
8e86c98a
JA
1475 break;
1476 }
1477
898bbd3b
JA
1478 for_each_dip(dip, i)
1479 for_each_tip(dip, tip, j)
1480 tip_ftrunc_final(tip);
8e86c98a 1481
410d7c62 1482 show_stats();
6a752c90
JA
1483
1484 if (is_done())
1485 return 0;
1486
898bbd3b
JA
1487 /*
1488 * cleanup for next run
1489 */
1490 for_each_dip(dip, i) {
1491 for_each_tip(dip, tip, j)
1492 fclose(tip->ofile);
1493
1494 free(dip->threads);
921b05fe 1495 free(dip->path);
898bbd3b
JA
1496 }
1497
1498 free(device_information);
1499 device_information = NULL;
1500 ncpus = ndevs = 0;
b46a0342
JA
1501
1502 close(net_in_fd);
1503 net_in_fd = -1;
dbfbd6db 1504 stat_shown = 0;
6a752c90 1505 goto repeat;
8e86c98a
JA
1506}
1507
1508/*
1509 * Setup outgoing network connection where we will transmit data
1510 */
1511static int net_setup_client(void)
1512{
1513 struct sockaddr_in addr;
1514 int fd;
1515
1516 fd = socket(AF_INET, SOCK_STREAM, 0);
1517 if (fd < 0) {
1518 perror("client: socket");
1519 return 1;
1520 }
1521
1522 memset(&addr, 0, sizeof(addr));
1523 addr.sin_family = AF_INET;
1524 addr.sin_port = htons(net_port);
1525
1526 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1527 struct hostent *hent = gethostbyname(hostname);
1528 if (!hent) {
1529 perror("gethostbyname");
1530 return 1;
1531 }
1532
1533 memcpy(&addr.sin_addr, hent->h_addr, 4);
1534 strcpy(hostname, hent->h_name);
1535 }
1536
1537 printf("blktrace: connecting to %s\n", hostname);
1538
1539 if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1540 perror("client: connect");
1541 return 1;
1542 }
1543
1544 printf("blktrace: connected!\n");
1545 net_out_fd = fd;
1546 return 0;
1547}
1548
52724a0e
JA
1549static char usage_str[] = \
1550 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1551 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1552 "\t-d Use specified device. May also be given last after options\n" \
1553 "\t-r Path to mounted relayfs, defaults to /relay\n" \
1554 "\t-o File(s) to send output to\n" \
d1d7f15f 1555 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
1556 "\t-k Kill a running trace\n" \
1557 "\t-w Stop after defined time, in seconds\n" \
1558 "\t-a Only trace specified actions. See documentation\n" \
1559 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
1560 "\t-b Sub buffer size in KiB\n" \
1561 "\t-n Number of sub buffers\n" \
f531b94d
JA
1562 "\t-l Run in network listen mode (blktrace server)\n" \
1563 "\t-h Run in network client mode, connecting to the given host\n" \
1564 "\t-p Network port to use (default 8462)\n" \
1565 "\t-s Make the network client use sendfile() to transfer data\n" \
1566 "\t-V Print program version info\n\n";
52724a0e 1567
ee1f4158
NS
1568static void show_usage(char *program)
1569{
52724a0e 1570 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 1571}
d0ca268b
JA
1572
1573int main(int argc, char *argv[])
1574{
5270dddd 1575 static char default_relay_path[] = "/relay";
e3e74029 1576 struct statfs st;
d39c04ca 1577 int i, c;
ece238a6 1578 int stop_watch = 0;
d39c04ca
AB
1579 int act_mask_tmp = 0;
1580
1581 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1582 switch (c) {
1583 case 'a':
1584 i = find_mask_map(optarg);
1585 if (i < 0) {
ab197ca7 1586 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 1587 optarg);
7425d456 1588 return 1;
d39c04ca
AB
1589 }
1590 act_mask_tmp |= i;
1591 break;
1592
1593 case 'A':
98f8386b
AB
1594 if ((sscanf(optarg, "%x", &i) != 1) ||
1595 !valid_act_opt(i)) {
d39c04ca 1596 fprintf(stderr,
ab197ca7 1597 "Invalid set action mask %s/0x%x\n",
d39c04ca 1598 optarg, i);
7425d456 1599 return 1;
d39c04ca
AB
1600 }
1601 act_mask_tmp = i;
1602 break;
d0ca268b 1603
d39c04ca 1604 case 'd':
e7c9f3ff
NS
1605 if (resize_devices(optarg) != 0)
1606 return 1;
d39c04ca
AB
1607 break;
1608
5270dddd
JA
1609 case 'r':
1610 relay_path = optarg;
1611 break;
1612
d5396421 1613 case 'o':
66efebf8 1614 output_name = optarg;
d5396421 1615 break;
bc39777c
JA
1616 case 'k':
1617 kill_running_trace = 1;
1618 break;
ece238a6
NS
1619 case 'w':
1620 stop_watch = atoi(optarg);
1621 if (stop_watch <= 0) {
1622 fprintf(stderr,
1623 "Invalid stopwatch value (%d secs)\n",
1624 stop_watch);
1625 return 1;
1626 }
1627 break;
57ea8602 1628 case 'V':
52724a0e
JA
1629 printf("%s version %s\n", argv[0], blktrace_version);
1630 return 0;
129aa440 1631 case 'b':
eb3c8108 1632 buf_size = strtoul(optarg, NULL, 10);
183a0855 1633 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 1634 fprintf(stderr,
eb3c8108 1635 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
1636 return 1;
1637 }
1638 buf_size <<= 10;
1639 break;
1640 case 'n':
eb3c8108 1641 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1642 if (buf_nr <= 0) {
1643 fprintf(stderr,
eb3c8108 1644 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1645 return 1;
1646 }
1647 break;
d1d7f15f
JA
1648 case 'D':
1649 output_dir = optarg;
1650 break;
8e86c98a
JA
1651 case 'h':
1652 net_mode = Net_client;
1653 strcpy(hostname, optarg);
1654 break;
1655 case 'l':
1656 net_mode = Net_server;
1657 break;
1658 case 'p':
1659 net_port = atoi(optarg);
1660 break;
32f18c48
JA
1661 case 's':
1662 net_sendfile = 1;
1663 break;
d39c04ca 1664 default:
ee1f4158 1665 show_usage(argv[0]);
7425d456 1666 return 1;
d39c04ca
AB
1667 }
1668 }
1669
8e86c98a
JA
1670 setlocale(LC_NUMERIC, "en_US");
1671
1672 page_size = getpagesize();
1673
1674 if (net_mode == Net_server)
1675 return net_server();
1676
22cd0c02
JA
1677 while (optind < argc) {
1678 if (resize_devices(argv[optind++]) != 0)
1679 return 1;
1680 }
1681
e7c9f3ff 1682 if (ndevs == 0) {
ee1f4158 1683 show_usage(argv[0]);
7425d456 1684 return 1;
d39c04ca
AB
1685 }
1686
5270dddd
JA
1687 if (!relay_path)
1688 relay_path = default_relay_path;
1689
d5396421 1690 if (act_mask_tmp != 0)
d39c04ca 1691 act_mask = act_mask_tmp;
d0ca268b 1692
e3e74029
NS
1693 if (statfs(relay_path, &st) < 0) {
1694 perror("statfs");
1695 fprintf(stderr,"%s does not appear to be a valid path\n",
1696 relay_path);
1697 return 1;
64acacae 1698 } else if (st.f_type != (long) RELAYFS_TYPE) {
e3e74029 1699 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
d0ca268b 1700 relay_path);
7425d456 1701 return 1;
d0ca268b
JA
1702 }
1703
e7c9f3ff 1704 if (open_devices() != 0)
7425d456 1705 return 1;
bc39777c
JA
1706
1707 if (kill_running_trace) {
e7c9f3ff 1708 stop_all_traces();
7425d456 1709 return 0;
bc39777c
JA
1710 }
1711
e7c9f3ff
NS
1712 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1713 if (ncpus < 0) {
1714 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 1715 return 1;
d0ca268b
JA
1716 }
1717
d0ca268b
JA
1718 signal(SIGINT, handle_sigint);
1719 signal(SIGHUP, handle_sigint);
1720 signal(SIGTERM, handle_sigint);
ece238a6 1721 signal(SIGALRM, handle_sigint);
d0ca268b 1722
8e86c98a
JA
1723 if (net_mode == Net_client && net_setup_client())
1724 return 1;
1725
1726 if (start_devices() != 0)
1727 return 1;
1728
e7c9f3ff 1729 atexit(stop_all_tracing);
830fd65c 1730
ece238a6
NS
1731 if (stop_watch)
1732 alarm(stop_watch);
1733
b7106311 1734 wait_for_threads();
d0ca268b 1735
eb3c8108
JA
1736 if (!is_trace_stopped()) {
1737 trace_stopped = 1;
91816d54
JA
1738 stop_all_threads();
1739 stop_all_traces();
91816d54 1740 }
d0ca268b 1741
eb3c8108
JA
1742 show_stats();
1743
d0ca268b
JA
1744 return 0;
1745}
1746