[PATCH] blktrace: remove padding logic, it's not needed with the new sendfile()
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
eb3c8108 32#include <sys/poll.h>
b7106311 33#include <sys/mman.h>
8e86c98a 34#include <sys/socket.h>
d0ca268b
JA
35#include <stdio.h>
36#include <stdlib.h>
37#include <sched.h>
d39c04ca
AB
38#include <ctype.h>
39#include <getopt.h>
da39451f 40#include <errno.h>
8e86c98a
JA
41#include <netinet/in.h>
42#include <arpa/inet.h>
43#include <netdb.h>
32f18c48 44#include <sys/sendfile.h>
d0ca268b
JA
45
46#include "blktrace.h"
21f55651 47#include "barrier.h"
d0ca268b 48
13d928f0 49static char blktrace_version[] = "0.99";
52724a0e 50
8f551a39
JA
51/*
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
54 */
007c233c 55#define BUF_SIZE (512 * 1024)
d0ca268b
JA
56#define BUF_NR (4)
57
007c233c
JA
58#define OFILE_BUF (128 * 1024)
59
e3e74029
NS
60#define RELAYFS_TYPE 0xF0B4A981
61
32f18c48 62#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
d5396421 63static struct option l_opts[] = {
5c86134e 64 {
d39c04ca 65 .name = "dev",
428683db 66 .has_arg = required_argument,
d39c04ca
AB
67 .flag = NULL,
68 .val = 'd'
69 },
5c86134e 70 {
d39c04ca 71 .name = "act-mask",
428683db 72 .has_arg = required_argument,
d39c04ca
AB
73 .flag = NULL,
74 .val = 'a'
75 },
5c86134e 76 {
d39c04ca 77 .name = "set-mask",
428683db 78 .has_arg = required_argument,
d39c04ca
AB
79 .flag = NULL,
80 .val = 'A'
81 },
5c86134e 82 {
5270dddd 83 .name = "relay",
428683db 84 .has_arg = required_argument,
5270dddd
JA
85 .flag = NULL,
86 .val = 'r'
87 },
d5396421
JA
88 {
89 .name = "output",
428683db 90 .has_arg = required_argument,
d5396421
JA
91 .flag = NULL,
92 .val = 'o'
93 },
bc39777c
JA
94 {
95 .name = "kill",
428683db 96 .has_arg = no_argument,
bc39777c
JA
97 .flag = NULL,
98 .val = 'k'
99 },
ece238a6
NS
100 {
101 .name = "stopwatch",
428683db 102 .has_arg = required_argument,
ece238a6
NS
103 .flag = NULL,
104 .val = 'w'
105 },
52724a0e
JA
106 {
107 .name = "version",
108 .has_arg = no_argument,
109 .flag = NULL,
57ea8602 110 .val = 'V'
52724a0e 111 },
129aa440 112 {
3f65c585 113 .name = "buffer-size",
129aa440
JA
114 .has_arg = required_argument,
115 .flag = NULL,
116 .val = 'b'
117 },
118 {
3f65c585 119 .name = "num-sub-buffers",
129aa440
JA
120 .has_arg = required_argument,
121 .flag = NULL,
122 .val = 'n'
123 },
d1d7f15f 124 {
3f65c585 125 .name = "output-dir",
d1d7f15f
JA
126 .has_arg = required_argument,
127 .flag = NULL,
128 .val = 'D'
129 },
8e86c98a
JA
130 {
131 .name = "listen",
132 .has_arg = no_argument,
133 .flag = NULL,
134 .val = 'l'
135 },
136 {
137 .name = "host",
138 .has_arg = required_argument,
139 .flag = NULL,
140 .val = 'h'
141 },
142 {
143 .name = "port",
144 .has_arg = required_argument,
145 .flag = NULL,
146 .val = 'p'
147 },
32f18c48
JA
148 {
149 .name = "sendfile",
150 .has_arg = no_argument,
151 .flag = NULL,
152 .val = 's'
153 },
71ef8b7c
JA
154 {
155 .name = NULL,
156 }
d39c04ca
AB
157};
158
9db17354 159struct tip_subbuf {
9db17354 160 void *buf;
5be4bdaf
JA
161 unsigned int len;
162 unsigned int max_len;
9db17354
JA
163};
164
21f55651
JA
165#define FIFO_SIZE (1024) /* should be plenty big! */
166#define CL_SIZE (128) /* cache line, any bigger? */
167
168struct tip_subbuf_fifo {
169 int tail __attribute__((aligned(CL_SIZE)));
170 int head __attribute__((aligned(CL_SIZE)));
171 struct tip_subbuf *q[FIFO_SIZE];
172};
173
d0ca268b
JA
174struct thread_information {
175 int cpu;
176 pthread_t thread;
b9d4294e
JA
177
178 int fd;
a3e4d330 179 void *fd_buf;
b9d4294e
JA
180 char fn[MAXPATHLEN + 64];
181
e3bf54d8
JA
182 struct in_addr cl_in_addr;
183
007c233c
JA
184 FILE *ofile;
185 char *ofile_buffer;
32f18c48 186 off_t ofile_offset;
9db17354 187 int ofile_stdout;
8e86c98a 188 int ofile_mmap;
11629347 189 volatile int sendfile_pending;
007c233c 190
0cc7d25e
JA
191 int (*get_subbuf)(struct thread_information *, unsigned int);
192 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
193 int (*read_data)(struct thread_information *, void *, unsigned int);
194
d0ca268b 195 unsigned long events_processed;
b7106311 196 unsigned long long data_read;
bcbeb60f 197 unsigned long long data_queued;
e7c9f3ff 198 struct device_information *device;
9db17354
JA
199
200 int exited;
201
b7106311
JA
202 /*
203 * piped fifo buffers
204 */
21f55651 205 struct tip_subbuf_fifo fifo;
7de86b12 206 struct tip_subbuf *leftover_ts;
b7106311
JA
207
208 /*
209 * mmap controlled output files
210 */
211 unsigned long long fs_size;
212 unsigned long long fs_max_size;
213 unsigned long fs_off;
214 void *fs_buf;
215 unsigned long fs_buf_len;
d0ca268b
JA
216};
217
e7c9f3ff
NS
218struct device_information {
219 int fd;
220 char *path;
221 char buts_name[32];
99c1f5ab 222 volatile int trace_started;
eb3c8108 223 unsigned long drop_count;
e7c9f3ff
NS
224 struct thread_information *threads;
225};
d0ca268b 226
e7c9f3ff 227static int ncpus;
d0ca268b 228static struct thread_information *thread_information;
e7c9f3ff
NS
229static int ndevs;
230static struct device_information *device_information;
231
232/* command line option globals */
233static char *relay_path;
d5396421 234static char *output_name;
d1d7f15f 235static char *output_dir;
5c86134e 236static int act_mask = ~0U;
bc39777c 237static int kill_running_trace;
eb3c8108
JA
238static unsigned long buf_size = BUF_SIZE;
239static unsigned long buf_nr = BUF_NR;
b7106311 240static unsigned int page_size;
d39c04ca 241
e7c9f3ff
NS
242#define is_done() (*(volatile int *)(&done))
243static volatile int done;
244
eb3c8108
JA
245#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
246static volatile int trace_stopped;
247
248#define is_stat_shown() (*(volatile int *)(&stat_shown))
249static volatile int stat_shown;
a3e4d330 250
8e86c98a
JA
251int data_is_native = -1;
252
72ca8801
NS
253static void exit_trace(int status);
254
99c1f5ab
JA
255#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
256#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
257
258#define __for_each_dip(__d, __i, __e) \
259 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
260
261#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
9db17354
JA
262#define for_each_tip(__d, __t, __j) \
263 for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
99c1f5ab 264
8e86c98a
JA
265/*
266 * networking stuff follows. we include a magic number so we know whether
267 * to endianness convert or not
268 */
269struct blktrace_net_hdr {
270 u32 magic; /* same as trace magic */
22cd0c02 271 char buts_name[32]; /* trace name */
8e86c98a 272 u32 cpu; /* for which cpu */
22cd0c02 273 u32 max_cpus;
8e86c98a
JA
274 u32 len; /* length of following trace data */
275};
276
277#define TRACE_NET_PORT (8462)
278
279enum {
280 Net_none = 0,
281 Net_server,
282 Net_client,
283};
284
285/*
286 * network cmd line params
287 */
288static char hostname[MAXHOSTNAMELEN];
289static int net_port = TRACE_NET_PORT;
290static int net_mode = 0;
f6fead25 291static int net_use_sendfile;
8e86c98a
JA
292
293static int net_in_fd = -1;
294static int net_out_fd = -1;
295
296static void handle_sigint(__attribute__((__unused__)) int sig)
297{
7035d92d
JA
298 struct device_information *dip;
299 int i;
300
301 /*
302 * stop trace so we can reap currently produced data
303 */
304 for_each_dip(dip, i) {
921b05fe
JA
305 if (dip->fd == -1)
306 continue;
7035d92d
JA
307 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
308 perror("BLKTRACESTOP");
309 }
310
8e86c98a
JA
311 done = 1;
312}
313
eb3c8108
JA
314static int get_dropped_count(const char *buts_name)
315{
316 int fd;
317 char tmp[MAXPATHLEN + 64];
318
319 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
320 relay_path, buts_name);
321
322 fd = open(tmp, O_RDONLY);
323 if (fd < 0) {
324 /*
325 * this may be ok, if the kernel doesn't support dropped counts
326 */
327 if (errno == ENOENT)
328 return 0;
329
330 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
331 return -1;
332 }
333
334 if (read(fd, tmp, sizeof(tmp)) < 0) {
335 perror(tmp);
336 close(fd);
337 return -1;
338 }
339
340 close(fd);
341
342 return atoi(tmp);
343}
344
e7c9f3ff 345static int start_trace(struct device_information *dip)
d0ca268b
JA
346{
347 struct blk_user_trace_setup buts;
348
1f79c4a0 349 memset(&buts, 0, sizeof(buts));
129aa440
JA
350 buts.buf_size = buf_size;
351 buts.buf_nr = buf_nr;
d39c04ca 352 buts.act_mask = act_mask;
d0ca268b 353
ed71a31e
JA
354 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
355 perror("BLKTRACESETUP");
356 return 1;
357 }
358
359 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
360 perror("BLKTRACESTART");
d0ca268b
JA
361 return 1;
362 }
363
e7c9f3ff 364 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 365 dip_set_tracing(dip, 1);
d0ca268b
JA
366 return 0;
367}
368
e7c9f3ff 369static void stop_trace(struct device_information *dip)
d0ca268b 370{
99c1f5ab
JA
371 if (dip_tracing(dip) || kill_running_trace) {
372 dip_set_tracing(dip, 0);
cf9208ea 373
7035d92d
JA
374 /*
375 * should be stopped, just don't complain if it isn't
376 */
377 ioctl(dip->fd, BLKTRACESTOP);
378
ed71a31e
JA
379 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
380 perror("BLKTRACETEARDOWN");
cf9208ea 381
e7c9f3ff 382 close(dip->fd);
cf9208ea 383 dip->fd = -1;
707b0914 384 }
d0ca268b
JA
385}
386
e7c9f3ff
NS
387static void stop_all_traces(void)
388{
389 struct device_information *dip;
390 int i;
391
eb3c8108
JA
392 for_each_dip(dip, i) {
393 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 394 stop_trace(dip);
eb3c8108 395 }
e7c9f3ff
NS
396}
397
eb3c8108
JA
398static void wait_for_data(struct thread_information *tip)
399{
400 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
401
9db17354 402 do {
b4aabcb3 403 poll(&pfd, 1, 100);
9db17354
JA
404 if (pfd.revents & POLLIN)
405 break;
406 if (tip->ofile_stdout)
407 break;
408 } while (!is_done());
eb3c8108
JA
409}
410
0cc7d25e
JA
411static int read_data_file(struct thread_information *tip, void *buf,
412 unsigned int len)
d0ca268b 413{
ae9f71b3 414 int ret = 0;
bbabf03a 415
9db17354
JA
416 do {
417 wait_for_data(tip);
ae9f71b3 418
9db17354
JA
419 ret = read(tip->fd, buf, len);
420 if (!ret)
421 continue;
422 else if (ret > 0)
423 return ret;
424 else {
bbabf03a 425 if (errno != EAGAIN) {
a3e4d330
JA
426 perror(tip->fn);
427 fprintf(stderr,"Thread %d failed read of %s\n",
428 tip->cpu, tip->fn);
429 break;
430 }
9db17354 431 continue;
bbabf03a 432 }
9db17354 433 } while (!is_done());
8a43bac5 434
bbabf03a 435 return ret;
8e86c98a 436
8a43bac5
JA
437}
438
0cc7d25e
JA
439static int read_data_net(struct thread_information *tip, void *buf,
440 unsigned int len)
8e86c98a
JA
441{
442 unsigned int bytes_left = len;
443 int ret = 0;
444
445 do {
446 ret = recv(net_in_fd, buf, bytes_left, MSG_WAITALL);
447
448 if (!ret)
449 continue;
450 else if (ret < 0) {
451 if (errno != EAGAIN) {
452 perror(tip->fn);
453 fprintf(stderr, "server: failed read\n");
454 return 0;
455 }
456 continue;
457 } else {
458 buf += ret;
459 bytes_left -= ret;
460 }
461 } while (!is_done() && bytes_left);
462
410d7c62 463 return len - bytes_left;
8e86c98a
JA
464}
465
0cc7d25e
JA
466static int read_data(struct thread_information *tip, void *buf,
467 unsigned int len)
8e86c98a 468{
663962f7 469 return tip->read_data(tip, buf, len);
8e86c98a
JA
470}
471
472static inline struct tip_subbuf *
473subbuf_fifo_dequeue(struct thread_information *tip)
a3e4d330 474{
21f55651
JA
475 const int head = tip->fifo.head;
476 const int next = (head + 1) & (FIFO_SIZE - 1);
477
478 if (head != tip->fifo.tail) {
479 struct tip_subbuf *ts = tip->fifo.q[head];
480
481 store_barrier();
482 tip->fifo.head = next;
483 return ts;
484 }
485
486 return NULL;
9db17354 487}
eb3c8108 488
21f55651
JA
489static inline int subbuf_fifo_queue(struct thread_information *tip,
490 struct tip_subbuf *ts)
9db17354 491{
21f55651
JA
492 const int tail = tip->fifo.tail;
493 const int next = (tail + 1) & (FIFO_SIZE - 1);
494
495 if (next != tip->fifo.head) {
496 tip->fifo.q[tail] = ts;
497 store_barrier();
498 tip->fifo.tail = next;
499 return 0;
500 }
501
502 fprintf(stderr, "fifo too small!\n");
503 return 1;
a3e4d330
JA
504}
505
b7106311
JA
506/*
507 * For file output, truncate and mmap the file appropriately
508 */
8e86c98a 509static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
b7106311
JA
510{
511 int ofd = fileno(tip->ofile);
512 int ret;
513
514 /*
515 * extend file, if we have to. use chunks of 16 subbuffers.
516 */
517 if (tip->fs_off + buf_size > tip->fs_buf_len) {
518 if (tip->fs_buf) {
5975d309 519 munlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
520 munmap(tip->fs_buf, tip->fs_buf_len);
521 tip->fs_buf = NULL;
522 }
523
524 tip->fs_off = tip->fs_size & (page_size - 1);
525 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
526 tip->fs_max_size += tip->fs_buf_len;
527
528 if (ftruncate(ofd, tip->fs_max_size) < 0) {
529 perror("ftruncate");
530 return -1;
531 }
532
533 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
534 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
535 if (tip->fs_buf == MAP_FAILED) {
536 perror("mmap");
537 return -1;
538 }
5975d309 539 mlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
540 }
541
8e86c98a 542 ret = read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
b7106311 543 if (ret >= 0) {
dbfbd6db 544 tip->data_read += ret;
b7106311
JA
545 tip->fs_size += ret;
546 tip->fs_off += ret;
547 return 0;
548 }
549
550 return -1;
551}
552
18eed2a7
JA
553/*
554 * Use the copy approach for pipes and network
555 */
556static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
557{
558 struct tip_subbuf *ts = malloc(sizeof(*ts));
559 int ret;
560
561 ts->buf = malloc(buf_size);
562 ts->max_len = maxlen;
563
564 ret = read_data(tip, ts->buf, ts->max_len);
565 if (ret > 0) {
566 ts->len = ret;
dbfbd6db 567 tip->data_read += ret;
7035d92d
JA
568 if (subbuf_fifo_queue(tip, ts))
569 return -1;
18eed2a7
JA
570 }
571
572 return ret;
573}
574
32f18c48
JA
575static int get_subbuf_sendfile(struct thread_information *tip,
576 unsigned int maxlen)
577{
11eedd9b 578 struct tip_subbuf *ts;
bcbeb60f
TZ
579 struct stat sb;
580 unsigned int ready;
32f18c48 581
18eed2a7
JA
582 wait_for_data(tip);
583
584 /*
585 * hack to get last data out, we can't use sendfile for that
586 */
587 if (is_done())
588 return get_subbuf(tip, maxlen);
589
11629347 590 if (tip->sendfile_pending) {
663962f7 591 usleep(100);
11eedd9b 592 return 0;
663962f7 593 }
11eedd9b 594
bcbeb60f
TZ
595 if (fstat(tip->fd, &sb) < 0) {
596 perror("trace stat");
597 return -1;
598 }
599 ready = sb.st_size - tip->data_queued;
600 if (!ready)
601 return 0;
602
11629347
JA
603 ts = malloc(sizeof(*ts));
604 ts->buf = NULL;
605 ts->max_len = 0;
bcbeb60f
TZ
606 ts->len = ready;
607 tip->data_queued += ready;
1be42f3d 608
11629347
JA
609 if (subbuf_fifo_queue(tip, ts))
610 return -1;
11eedd9b 611
11629347 612 tip->sendfile_pending++;
bcbeb60f 613 return ready;
32f18c48
JA
614}
615
9db17354 616static void close_thread(struct thread_information *tip)
a3e4d330 617{
9db17354
JA
618 if (tip->fd != -1)
619 close(tip->fd);
620 if (tip->ofile)
621 fclose(tip->ofile);
622 if (tip->ofile_buffer)
623 free(tip->ofile_buffer);
624 if (tip->fd_buf)
625 free(tip->fd_buf);
1c99bc21 626
9db17354
JA
627 tip->fd = -1;
628 tip->ofile = NULL;
629 tip->ofile_buffer = NULL;
630 tip->fd_buf = NULL;
a3e4d330
JA
631}
632
8e86c98a
JA
633static void tip_ftrunc_final(struct thread_information *tip)
634{
635 /*
636 * truncate to right size and cleanup mmap
637 */
638 if (tip->ofile_mmap) {
639 int ofd = fileno(tip->ofile);
640
641 if (tip->fs_buf)
642 munmap(tip->fs_buf, tip->fs_buf_len);
643
644 ftruncate(ofd, tip->fs_size);
645 }
646}
647
9db17354 648static void *thread_main(void *arg)
a3e4d330 649{
9db17354
JA
650 struct thread_information *tip = arg;
651 pid_t pid = getpid();
652 cpu_set_t cpu_mask;
a3e4d330 653
9db17354
JA
654 CPU_ZERO(&cpu_mask);
655 CPU_SET((tip->cpu), &cpu_mask);
a3e4d330 656
9db17354
JA
657 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
658 perror("sched_setaffinity");
659 exit_trace(1);
660 }
a3e4d330 661
9db17354
JA
662 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
663 relay_path, tip->device->buts_name, tip->cpu);
664 tip->fd = open(tip->fn, O_RDONLY);
665 if (tip->fd < 0) {
666 perror(tip->fn);
667 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
668 tip->fn);
669 exit_trace(1);
a3e4d330
JA
670 }
671
b7106311 672 while (!is_done()) {
7035d92d 673 if (tip->get_subbuf(tip, buf_size) < 0)
0cc7d25e 674 break;
b7106311
JA
675 }
676
7035d92d
JA
677 /*
678 * trace is stopped, pull data until we get a short read
679 */
680 while (tip->get_subbuf(tip, buf_size) > 0)
681 ;
682
8e86c98a
JA
683 tip_ftrunc_final(tip);
684 tip->exited = 1;
685 return NULL;
686}
b7106311 687
8e86c98a
JA
688static int write_data_net(int fd, void *buf, unsigned int buf_len)
689{
690 unsigned int bytes_left = buf_len;
691 int ret;
b7106311 692
8e86c98a
JA
693 while (bytes_left) {
694 ret = send(fd, buf, bytes_left, 0);
695 if (ret < 0) {
696 perror("send");
697 return 1;
698 }
699
700 buf += ret;
701 bytes_left -= ret;
9db17354 702 }
a3e4d330 703
8e86c98a 704 return 0;
a3e4d330
JA
705}
706
32f18c48 707static int net_send_header(struct thread_information *tip, unsigned int len)
8e86c98a
JA
708{
709 struct blktrace_net_hdr hdr;
8e86c98a
JA
710
711 hdr.magic = BLK_IO_TRACE_MAGIC;
22cd0c02 712 strcpy(hdr.buts_name, tip->device->buts_name);
8e86c98a 713 hdr.cpu = tip->cpu;
22cd0c02 714 hdr.max_cpus = ncpus;
32f18c48 715 hdr.len = len;
8e86c98a 716
32f18c48
JA
717 return write_data_net(net_out_fd, &hdr, sizeof(hdr));
718}
8e86c98a 719
6a752c90
JA
720/*
721 * send header with 0 length to signal end-of-run
722 */
723static void net_client_send_close(void)
724{
725 struct blktrace_net_hdr hdr;
726
727 hdr.magic = BLK_IO_TRACE_MAGIC;
728 hdr.cpu = 0;
729 hdr.max_cpus = ncpus;
730 hdr.len = 0;
731
732 write_data_net(net_out_fd, &hdr, sizeof(hdr));
733}
734
32f18c48
JA
735static int flush_subbuf_net(struct thread_information *tip,
736 struct tip_subbuf *ts)
737{
738 if (net_send_header(tip, ts->len))
739 return 1;
22cd0c02
JA
740 if (write_data_net(net_out_fd, ts->buf, ts->len))
741 return 1;
8e86c98a 742
f0597a7e 743 free(ts->buf);
8e86c98a
JA
744 free(ts);
745 return 0;
746}
747
f6fead25
JA
748static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
749{
11629347
JA
750 int ret = sendfile(net_out_fd, tip->fd, NULL, ts->len);
751
752 if (ret < 0) {
753 perror("sendfile");
754 return 1;
755 } else if (ret < (int) ts->len) {
756 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
757 return 1;
758 }
759
760 return 0;
761}
762
32f18c48
JA
763static int flush_subbuf_sendfile(struct thread_information *tip,
764 struct tip_subbuf *ts)
765{
bcbeb60f 766 int ret = 1;
18eed2a7
JA
767
768 /*
769 * currently we cannot use sendfile() on the last bytes read, as they
770 * may not be a full subbuffer. get_subbuf_sendfile() falls back to
771 * the read approach for those, so use send() to ship them out
772 */
773 if (ts->buf)
774 return flush_subbuf_net(tip, ts);
11eedd9b 775
f6fead25 776 if (net_send_header(tip, ts->len))
11629347 777 goto err;
f6fead25 778 if (net_sendfile(tip, ts))
11629347 779 goto err;
32f18c48 780
f6fead25 781 tip->data_read += ts->len;
e076d33b
JA
782 tip->ofile_offset += buf_size;
783 ret = 0;
11629347
JA
784err:
785 tip->sendfile_pending--;
32f18c48 786 free(ts);
11629347 787 return ret;
32f18c48
JA
788}
789
8e86c98a
JA
790static int write_data(struct thread_information *tip, void *buf,
791 unsigned int buf_len)
8a43bac5 792{
7126171a 793 int ret;
8a43bac5 794
6480258a
JA
795 if (!buf_len)
796 return 0;
797
7126171a
JA
798 while (1) {
799 ret = fwrite(buf, buf_len, 1, tip->ofile);
007c233c 800 if (ret == 1)
8a43bac5
JA
801 break;
802
db6fe5bc
JA
803 if (ret < 0) {
804 perror("write");
805 return 1;
8a43bac5 806 }
d0ca268b
JA
807 }
808
9db17354 809 if (tip->ofile_stdout)
7126171a
JA
810 fflush(tip->ofile);
811
8a43bac5
JA
812 return 0;
813}
814
8e86c98a
JA
815static int flush_subbuf_file(struct thread_information *tip,
816 struct tip_subbuf *ts)
8a43bac5 817{
9db17354
JA
818 unsigned int offset = 0;
819 struct blk_io_trace *t;
820 int pdu_len, events = 0;
8a43bac5 821
9db17354 822 /*
7de86b12 823 * surplus from last run
9db17354 824 */
7de86b12
AB
825 if (tip->leftover_ts) {
826 struct tip_subbuf *prev_ts = tip->leftover_ts;
827
9e8b753c 828 if (prev_ts->len + ts->len > prev_ts->max_len) {
7de86b12
AB
829 prev_ts->max_len += ts->len;
830 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
831 }
832
9e8b753c 833 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
7de86b12
AB
834 prev_ts->len += ts->len;
835
836 free(ts->buf);
837 free(ts);
838
839 ts = prev_ts;
840 tip->leftover_ts = NULL;
9db17354 841 }
d0ca268b 842
9db17354
JA
843 while (offset + sizeof(*t) <= ts->len) {
844 t = ts->buf + offset;
3a9d6c13 845
9cfa6c2b
AB
846 if (verify_trace(t)) {
847 write_data(tip, ts->buf, offset);
9db17354 848 return -1;
9cfa6c2b 849 }
3a9d6c13 850
9db17354 851 pdu_len = t->pdu_len;
3a9d6c13 852
9db17354 853 if (offset + sizeof(*t) + pdu_len > ts->len)
3a9d6c13 854 break;
4b5db44a 855
9db17354
JA
856 offset += sizeof(*t) + pdu_len;
857 tip->events_processed++;
b7106311 858 tip->data_read += sizeof(*t) + pdu_len;
9db17354 859 events++;
3a9d6c13
JA
860 }
861
9cfa6c2b
AB
862 if (write_data(tip, ts->buf, offset))
863 return -1;
864
3a9d6c13 865 /*
9db17354 866 * leftover bytes, save them for next time
3a9d6c13 867 */
9db17354 868 if (offset != ts->len) {
7de86b12 869 tip->leftover_ts = ts;
9e8b753c
JA
870 ts->len -= offset;
871 memmove(ts->buf, ts->buf + offset, ts->len);
7de86b12
AB
872 } else {
873 free(ts->buf);
874 free(ts);
9db17354 875 }
4b5db44a 876
9db17354 877 return events;
4b5db44a
JA
878}
879
9db17354 880static int write_tip_events(struct thread_information *tip)
d5396421 881{
21f55651 882 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
d5396421 883
0cc7d25e
JA
884 if (ts)
885 return tip->flush_subbuf(tip, ts);
91816d54 886
9db17354 887 return 0;
91816d54
JA
888}
889
9db17354
JA
890/*
891 * scans the tips we know and writes out the subbuffers we accumulate
892 */
893static void get_and_write_events(void)
d0ca268b 894{
9db17354
JA
895 struct device_information *dip;
896 struct thread_information *tip;
27223f19 897 int i, j, events, ret, tips_running;
d0ca268b 898
9db17354
JA
899 while (!is_done()) {
900 events = 0;
d0ca268b 901
9db17354
JA
902 for_each_dip(dip, i) {
903 for_each_tip(dip, tip, j) {
904 ret = write_tip_events(tip);
905 if (ret > 0)
906 events += ret;
907 }
908 }
d0ca268b 909
9db17354
JA
910 if (!events)
911 usleep(10);
d0ca268b
JA
912 }
913
a3e4d330 914 /*
9db17354 915 * reap stored events
a3e4d330 916 */
9db17354
JA
917 do {
918 events = 0;
27223f19 919 tips_running = 0;
9db17354
JA
920 for_each_dip(dip, i) {
921 for_each_tip(dip, tip, j) {
922 ret = write_tip_events(tip);
923 if (ret > 0)
924 events += ret;
27223f19 925 tips_running += !tip->exited;
9db17354 926 }
69e65a9e 927 }
9db17354 928 usleep(10);
27223f19 929 } while (events || tips_running);
d0ca268b
JA
930}
931
b7106311
JA
932static void wait_for_threads(void)
933{
934 /*
8e86c98a
JA
935 * for piped or network output, poll and fetch data for writeout.
936 * for files, we just wait around for trace threads to exit
b7106311 937 */
8e86c98a
JA
938 if ((output_name && !strcmp(output_name, "-")) ||
939 net_mode == Net_client)
b7106311
JA
940 get_and_write_events();
941 else {
942 struct device_information *dip;
943 struct thread_information *tip;
944 int i, j, tips_running;
945
946 do {
947 tips_running = 0;
948 usleep(1000);
949
950 for_each_dip(dip, i)
951 for_each_tip(dip, tip, j)
952 tips_running += !tip->exited;
953 } while (tips_running);
954 }
6a752c90
JA
955
956 if (net_mode == Net_client)
957 net_client_send_close();
b7106311
JA
958}
959
e3bf54d8
JA
960static int fill_ofname(struct thread_information *tip, char *dst,
961 char *buts_name)
8e86c98a 962{
e3bf54d8 963 struct stat sb;
8e86c98a 964 int len = 0;
e3bf54d8 965 time_t t;
8e86c98a
JA
966
967 if (output_dir)
968 len = sprintf(dst, "%s/", output_dir);
969
e3bf54d8
JA
970 if (net_mode == Net_server) {
971 len += sprintf(dst + len, "%s-", inet_ntoa(tip->cl_in_addr));
972 time(&t);
973 len += strftime(dst + len, 64, "%F-%T/", gmtime(&t));
974 }
975
976 if (stat(dst, &sb) < 0) {
977 if (errno != ENOENT) {
978 perror("stat");
979 return 1;
980 }
981 if (mkdir(dst, 0755) < 0) {
982 perror(dst);
983 fprintf(stderr, "Can't make output dir\n");
984 return 1;
985 }
986 }
987
8e86c98a 988 if (output_name)
e3bf54d8 989 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
8e86c98a 990 else
e3bf54d8
JA
991 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
992
993 return 0;
8e86c98a
JA
994}
995
0cc7d25e
JA
996static void fill_ops(struct thread_information *tip)
997{
998 /*
999 * setup ops
1000 */
32f18c48 1001 if (net_mode == Net_client) {
36808255 1002 if (net_use_sendfile) {
32f18c48
JA
1003 tip->get_subbuf = get_subbuf_sendfile;
1004 tip->flush_subbuf = flush_subbuf_sendfile;
1005 } else {
1006 tip->get_subbuf = get_subbuf;
1007 tip->flush_subbuf = flush_subbuf_net;
1008 }
1009 } else {
1010 if (tip->ofile_mmap)
1011 tip->get_subbuf = mmap_subbuf;
1012 else
1013 tip->get_subbuf = get_subbuf;
0cc7d25e 1014
0cc7d25e 1015 tip->flush_subbuf = flush_subbuf_file;
32f18c48
JA
1016 }
1017
0cc7d25e
JA
1018 if (net_mode == Net_server)
1019 tip->read_data = read_data_net;
1020 else
1021 tip->read_data = read_data_file;
1022}
1023
ddf22842
JA
1024static int tip_open_output(struct device_information *dip,
1025 struct thread_information *tip)
d0ca268b 1026{
ddf22842 1027 int pipeline = output_name && !strcmp(output_name, "-");
8e86c98a 1028 int mode, vbuf_size;
e3bf54d8 1029 char op[128];
d0ca268b 1030
ddf22842
JA
1031 if (net_mode == Net_client) {
1032 tip->ofile = NULL;
1033 tip->ofile_stdout = 0;
1034 tip->ofile_mmap = 0;
0c0b75b4 1035 goto done;
ddf22842
JA
1036 } else if (pipeline) {
1037 tip->ofile = fdopen(STDOUT_FILENO, "w");
1038 tip->ofile_stdout = 1;
1039 tip->ofile_mmap = 0;
1040 mode = _IOLBF;
1041 vbuf_size = 512;
1042 } else {
e3bf54d8
JA
1043 if (fill_ofname(tip, op, dip->buts_name))
1044 return 1;
ddf22842
JA
1045 tip->ofile = fopen(op, "w+");
1046 tip->ofile_stdout = 0;
1047 tip->ofile_mmap = 1;
1048 mode = _IOFBF;
1049 vbuf_size = OFILE_BUF;
1050 }
d5396421 1051
0c0b75b4 1052 if (tip->ofile == NULL) {
ddf22842
JA
1053 perror(op);
1054 return 1;
1055 }
d5396421 1056
0c0b75b4
JA
1057 tip->ofile_buffer = malloc(vbuf_size);
1058 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1059 perror("setvbuf");
1060 close_thread(tip);
1061 return 1;
ddf22842
JA
1062 }
1063
0c0b75b4 1064done:
ddf22842
JA
1065 fill_ops(tip);
1066 return 0;
1067}
007c233c 1068
ddf22842
JA
1069static int start_threads(struct device_information *dip)
1070{
1071 struct thread_information *tip;
1072 int j;
1073
1074 for_each_tip(dip, tip, j) {
1075 tip->cpu = j;
1076 tip->device = dip;
1077 tip->events_processed = 0;
11eedd9b 1078 tip->fd = -1;
ddf22842
JA
1079 memset(&tip->fifo, 0, sizeof(tip->fifo));
1080 tip->leftover_ts = NULL;
1081
1082 if (tip_open_output(dip, tip))
1083 return 1;
0cc7d25e 1084
9db17354 1085 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
e7c9f3ff 1086 perror("pthread_create");
007c233c 1087 close_thread(tip);
e7c9f3ff 1088 return 1;
d0ca268b
JA
1089 }
1090 }
1091
e7c9f3ff 1092 return 0;
d0ca268b
JA
1093}
1094
e7c9f3ff 1095static void stop_threads(struct device_information *dip)
3aabcd89 1096{
e7c9f3ff 1097 struct thread_information *tip;
91816d54 1098 unsigned long ret;
007c233c
JA
1099 int i;
1100
9db17354 1101 for_each_tip(dip, tip, i) {
91816d54 1102 (void) pthread_join(tip->thread, (void *) &ret);
9db17354
JA
1103 close_thread(tip);
1104 }
3aabcd89
JA
1105}
1106
e7c9f3ff 1107static void stop_all_threads(void)
72ca8801 1108{
e7c9f3ff 1109 struct device_information *dip;
72ca8801
NS
1110 int i;
1111
99c1f5ab 1112 for_each_dip(dip, i)
e7c9f3ff
NS
1113 stop_threads(dip);
1114}
1115
1116static void stop_all_tracing(void)
1117{
1118 struct device_information *dip;
91816d54 1119 int i;
007c233c 1120
91816d54 1121 for_each_dip(dip, i)
e7c9f3ff 1122 stop_trace(dip);
72ca8801
NS
1123}
1124
1125static void exit_trace(int status)
1126{
eb3c8108
JA
1127 if (!is_trace_stopped()) {
1128 trace_stopped = 1;
1129 stop_all_threads();
1130 stop_all_tracing();
1131 }
1132
72ca8801
NS
1133 exit(status);
1134}
1135
e7c9f3ff
NS
1136static int resize_devices(char *path)
1137{
1138 int size = (ndevs + 1) * sizeof(struct device_information);
1139
1140 device_information = realloc(device_information, size);
1141 if (!device_information) {
1142 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1143 return 1;
1144 }
1145 device_information[ndevs].path = path;
1146 ndevs++;
1147 return 0;
1148}
1149
1150static int open_devices(void)
d0ca268b 1151{
e7c9f3ff 1152 struct device_information *dip;
d0ca268b 1153 int i;
d0ca268b 1154
99c1f5ab 1155 for_each_dip(dip, i) {
cf9208ea 1156 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
1157 if (dip->fd < 0) {
1158 perror(dip->path);
1159 return 1;
1160 }
1161 }
99c1f5ab 1162
e7c9f3ff
NS
1163 return 0;
1164}
1165
1166static int start_devices(void)
1167{
1168 struct device_information *dip;
1169 int i, j, size;
1170
1171 size = ncpus * sizeof(struct thread_information);
1172 thread_information = malloc(size * ndevs);
1173 if (!thread_information) {
1174 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1175 return 1;
1176 }
d5396421 1177
99c1f5ab 1178 for_each_dip(dip, i) {
e7c9f3ff
NS
1179 if (start_trace(dip)) {
1180 close(dip->fd);
1181 fprintf(stderr, "Failed to start trace on %s\n",
1182 dip->path);
1183 break;
1184 }
1185 }
99c1f5ab 1186
e7c9f3ff 1187 if (i != ndevs) {
99c1f5ab 1188 __for_each_dip(dip, j, i)
e7c9f3ff 1189 stop_trace(dip);
99c1f5ab 1190
e7c9f3ff
NS
1191 return 1;
1192 }
1193
99c1f5ab 1194 for_each_dip(dip, i) {
e7c9f3ff
NS
1195 dip->threads = thread_information + (i * ncpus);
1196 if (start_threads(dip)) {
1197 fprintf(stderr, "Failed to start worker threads\n");
1198 break;
1199 }
1200 }
99c1f5ab 1201
e7c9f3ff 1202 if (i != ndevs) {
99c1f5ab 1203 __for_each_dip(dip, j, i)
e7c9f3ff 1204 stop_threads(dip);
99c1f5ab 1205 for_each_dip(dip, i)
e7c9f3ff 1206 stop_trace(dip);
99c1f5ab 1207
e7c9f3ff 1208 return 1;
d0ca268b
JA
1209 }
1210
e7c9f3ff 1211 return 0;
d0ca268b
JA
1212}
1213
e7c9f3ff
NS
1214static void show_stats(void)
1215{
e7c9f3ff
NS
1216 struct device_information *dip;
1217 struct thread_information *tip;
b7106311 1218 unsigned long long events_processed, data_read;
eb3c8108 1219 unsigned long total_drops;
2f903295 1220 int i, j, no_stdout = 0;
eb3c8108
JA
1221
1222 if (is_stat_shown())
1223 return;
1224
2f903295
JA
1225 if (output_name && !strcmp(output_name, "-"))
1226 no_stdout = 1;
1227
eb3c8108 1228 stat_shown = 1;
428683db 1229
56070ea4 1230 total_drops = 0;
99c1f5ab 1231 for_each_dip(dip, i) {
2f903295 1232 if (!no_stdout)
56070ea4 1233 printf("Device: %s\n", dip->path);
e7c9f3ff 1234 events_processed = 0;
b7106311 1235 data_read = 0;
99c1f5ab 1236 for_each_tip(dip, tip, j) {
2f903295 1237 if (!no_stdout)
b7106311
JA
1238 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1239 tip->cpu, tip->events_processed,
54824c20 1240 (tip->data_read + 1023) >> 10);
e7c9f3ff 1241 events_processed += tip->events_processed;
b7106311 1242 data_read += tip->data_read;
e7c9f3ff 1243 }
eb3c8108 1244 total_drops += dip->drop_count;
2f903295 1245 if (!no_stdout)
b7106311
JA
1246 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1247 events_processed, dip->drop_count,
18d8437d 1248 (data_read + 1023) >> 10);
e7c9f3ff 1249 }
56070ea4
JA
1250
1251 if (total_drops)
1252 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 1253}
52724a0e 1254
e3bf54d8
JA
1255static struct device_information *net_get_dip(char *buts_name,
1256 struct in_addr *cl_in_addr)
8e86c98a 1257{
22cd0c02 1258 struct device_information *dip;
8e86c98a
JA
1259 int i;
1260
22cd0c02
JA
1261 for (i = 0; i < ndevs; i++) {
1262 dip = &device_information[i];
8e86c98a 1263
22cd0c02
JA
1264 if (!strcmp(dip->buts_name, buts_name))
1265 return dip;
8e86c98a
JA
1266 }
1267
22cd0c02
JA
1268 device_information = realloc(device_information, (ndevs + 1) * sizeof(*dip));
1269 dip = &device_information[ndevs];
921b05fe
JA
1270 memset(dip, 0, sizeof(*dip));
1271 dip->fd = -1;
22cd0c02 1272 strcpy(dip->buts_name, buts_name);
921b05fe 1273 dip->path = strdup(buts_name);
22cd0c02
JA
1274 ndevs++;
1275 dip->threads = malloc(ncpus * sizeof(struct thread_information));
1276 memset(dip->threads, 0, ncpus * sizeof(struct thread_information));
1277
1278 /*
1279 * open all files
1280 */
1281 for (i = 0; i < ncpus; i++) {
1282 struct thread_information *tip = &dip->threads[i];
8e86c98a 1283
22cd0c02 1284 tip->cpu = i;
22cd0c02 1285 tip->device = dip;
1366e53a 1286 tip->fd = -1;
e3bf54d8 1287 tip->cl_in_addr = *cl_in_addr;
8e86c98a 1288
ddf22842 1289 if (tip_open_output(dip, tip))
22cd0c02 1290 return NULL;
8e86c98a
JA
1291 }
1292
22cd0c02
JA
1293 return dip;
1294}
1295
e3bf54d8
JA
1296static struct thread_information *net_get_tip(struct blktrace_net_hdr *bnh,
1297 struct in_addr *cl_in_addr)
22cd0c02
JA
1298{
1299 struct device_information *dip;
1300
1301 ncpus = bnh->max_cpus;
e3bf54d8 1302 dip = net_get_dip(bnh->buts_name, cl_in_addr);
22cd0c02 1303 return &dip->threads[bnh->cpu];
8e86c98a
JA
1304}
1305
1306static int net_get_header(struct blktrace_net_hdr *bnh)
1307{
1308 int fl = fcntl(net_in_fd, F_GETFL);
1309 int bytes_left, ret;
1310 void *p = bnh;
1311
1312 fcntl(net_in_fd, F_SETFL, fl | O_NONBLOCK);
1313 bytes_left = sizeof(*bnh);
1314 while (bytes_left && !is_done()) {
1315 ret = recv(net_in_fd, p, bytes_left, MSG_WAITALL);
1316 if (ret < 0) {
1317 if (errno != EAGAIN) {
1318 perror("recv header");
1319 return 1;
1320 }
1321 usleep(100);
1322 continue;
1323 } else if (!ret) {
1324 usleep(100);
1325 continue;
1326 } else {
1327 p += ret;
1328 bytes_left -= ret;
1329 }
1330 }
1331 fcntl(net_in_fd, F_SETFL, fl & ~O_NONBLOCK);
227f89ff 1332 return bytes_left;
8e86c98a
JA
1333}
1334
e3bf54d8 1335static int net_server_loop(struct in_addr *cl_in_addr)
8e86c98a
JA
1336{
1337 struct thread_information *tip;
1338 struct blktrace_net_hdr bnh;
1339
1340 if (net_get_header(&bnh))
1341 return 1;
1342
1343 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1344 fprintf(stderr, "server: received data is bad\n");
1345 return 1;
1346 }
1347
1348 if (!data_is_native) {
227f89ff 1349 bnh.magic = be32_to_cpu(bnh.magic);
8e86c98a
JA
1350 bnh.cpu = be32_to_cpu(bnh.cpu);
1351 bnh.len = be32_to_cpu(bnh.len);
1352 }
1353
227f89ff
JA
1354 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1355 fprintf(stderr, "server: bad data magic\n");
1356 return 1;
1357 }
1358
6a752c90
JA
1359 /*
1360 * len == 0 means that the other end signalled end-of-run
1361 */
1362 if (!bnh.len) {
1363 fprintf(stderr, "server: end of run\n");
1364 return 1;
1365 }
1366
e3bf54d8 1367 tip = net_get_tip(&bnh, cl_in_addr);
8e86c98a
JA
1368 if (!tip)
1369 return 1;
1370
1371 if (mmap_subbuf(tip, bnh.len))
1372 return 1;
1373
1374 return 0;
1375}
1376
659bcc3f
JA
1377static int get_connection(int fd, struct sockaddr_in *addr)
1378{
1379 struct pollfd pfd = { .fd = fd, .events = POLLIN };
1380 socklen_t socklen;
1381
1382 printf("blktrace: waiting for incoming connection...\n");
1383
1384 if (poll(&pfd, 1, -1) < 0) {
1385 perror("poll for connection");
1386 return 1;
1387 }
1388 if ((pfd.revents & POLLIN) == 0)
1389 return 1;
1390
1391 socklen = sizeof(*addr);
1392 net_in_fd = accept(fd, (struct sockaddr *) addr, &socklen);
1393 if (net_in_fd < 0) {
1394 perror("accept");
1395 return 1;
1396 }
1397
1398 printf("blktrace: connection from %s\n", inet_ntoa(addr->sin_addr));
1399 return 0;
1400}
1401
8e86c98a
JA
1402/*
1403 * Start here when we are in server mode - just fetch data from the network
1404 * and dump to files
1405 */
1406static int net_server(void)
1407{
898bbd3b
JA
1408 struct device_information *dip;
1409 struct thread_information *tip;
8e86c98a 1410 struct sockaddr_in addr;
22cd0c02 1411 int fd, opt, i, j;
8e86c98a
JA
1412
1413 fd = socket(AF_INET, SOCK_STREAM, 0);
1414 if (fd < 0) {
1415 perror("server: socket");
1416 return 1;
1417 }
1418
1419 opt = 1;
1420 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1421 perror("setsockopt");
1422 return 1;
1423 }
1424
1425 memset(&addr, 0, sizeof(addr));
1426 addr.sin_family = AF_INET;
1427 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1428 addr.sin_port = htons(net_port);
1429
1430 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1431 perror("bind");
1432 return 1;
1433 }
1434
1435 if (listen(fd, 1) < 0) {
1436 perror("listen");
1437 return 1;
1438 }
1439
6a752c90 1440repeat:
659bcc3f
JA
1441 if (get_connection(fd, &addr))
1442 return 0;
8e86c98a
JA
1443
1444 while (!is_done()) {
e3bf54d8 1445 if (net_server_loop(&addr.sin_addr))
8e86c98a
JA
1446 break;
1447 }
1448
898bbd3b
JA
1449 for_each_dip(dip, i)
1450 for_each_tip(dip, tip, j)
1451 tip_ftrunc_final(tip);
8e86c98a 1452
410d7c62 1453 show_stats();
6a752c90
JA
1454
1455 if (is_done())
1456 return 0;
1457
898bbd3b
JA
1458 /*
1459 * cleanup for next run
1460 */
1461 for_each_dip(dip, i) {
1462 for_each_tip(dip, tip, j)
1463 fclose(tip->ofile);
1464
1465 free(dip->threads);
921b05fe 1466 free(dip->path);
898bbd3b
JA
1467 }
1468
1469 free(device_information);
1470 device_information = NULL;
1471 ncpus = ndevs = 0;
b46a0342
JA
1472
1473 close(net_in_fd);
1474 net_in_fd = -1;
dbfbd6db 1475 stat_shown = 0;
6a752c90 1476 goto repeat;
8e86c98a
JA
1477}
1478
1479/*
1480 * Setup outgoing network connection where we will transmit data
1481 */
1482static int net_setup_client(void)
1483{
1484 struct sockaddr_in addr;
1485 int fd;
1486
1487 fd = socket(AF_INET, SOCK_STREAM, 0);
1488 if (fd < 0) {
1489 perror("client: socket");
1490 return 1;
1491 }
1492
1493 memset(&addr, 0, sizeof(addr));
1494 addr.sin_family = AF_INET;
1495 addr.sin_port = htons(net_port);
1496
1497 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1498 struct hostent *hent = gethostbyname(hostname);
1499 if (!hent) {
1500 perror("gethostbyname");
1501 return 1;
1502 }
1503
1504 memcpy(&addr.sin_addr, hent->h_addr, 4);
1505 strcpy(hostname, hent->h_name);
1506 }
1507
1508 printf("blktrace: connecting to %s\n", hostname);
1509
1510 if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1511 perror("client: connect");
1512 return 1;
1513 }
1514
1515 printf("blktrace: connected!\n");
1516 net_out_fd = fd;
1517 return 0;
1518}
1519
52724a0e
JA
1520static char usage_str[] = \
1521 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1522 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1523 "\t-d Use specified device. May also be given last after options\n" \
1524 "\t-r Path to mounted relayfs, defaults to /relay\n" \
1525 "\t-o File(s) to send output to\n" \
d1d7f15f 1526 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
1527 "\t-k Kill a running trace\n" \
1528 "\t-w Stop after defined time, in seconds\n" \
1529 "\t-a Only trace specified actions. See documentation\n" \
1530 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
1531 "\t-b Sub buffer size in KiB\n" \
1532 "\t-n Number of sub buffers\n" \
f531b94d
JA
1533 "\t-l Run in network listen mode (blktrace server)\n" \
1534 "\t-h Run in network client mode, connecting to the given host\n" \
1535 "\t-p Network port to use (default 8462)\n" \
1536 "\t-s Make the network client use sendfile() to transfer data\n" \
1537 "\t-V Print program version info\n\n";
52724a0e 1538
ee1f4158
NS
1539static void show_usage(char *program)
1540{
52724a0e 1541 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 1542}
d0ca268b
JA
1543
1544int main(int argc, char *argv[])
1545{
5270dddd 1546 static char default_relay_path[] = "/relay";
e3e74029 1547 struct statfs st;
d39c04ca 1548 int i, c;
ece238a6 1549 int stop_watch = 0;
d39c04ca
AB
1550 int act_mask_tmp = 0;
1551
1552 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1553 switch (c) {
1554 case 'a':
1555 i = find_mask_map(optarg);
1556 if (i < 0) {
ab197ca7 1557 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 1558 optarg);
7425d456 1559 return 1;
d39c04ca
AB
1560 }
1561 act_mask_tmp |= i;
1562 break;
1563
1564 case 'A':
98f8386b
AB
1565 if ((sscanf(optarg, "%x", &i) != 1) ||
1566 !valid_act_opt(i)) {
d39c04ca 1567 fprintf(stderr,
ab197ca7 1568 "Invalid set action mask %s/0x%x\n",
d39c04ca 1569 optarg, i);
7425d456 1570 return 1;
d39c04ca
AB
1571 }
1572 act_mask_tmp = i;
1573 break;
d0ca268b 1574
d39c04ca 1575 case 'd':
e7c9f3ff
NS
1576 if (resize_devices(optarg) != 0)
1577 return 1;
d39c04ca
AB
1578 break;
1579
5270dddd
JA
1580 case 'r':
1581 relay_path = optarg;
1582 break;
1583
d5396421 1584 case 'o':
66efebf8 1585 output_name = optarg;
d5396421 1586 break;
bc39777c
JA
1587 case 'k':
1588 kill_running_trace = 1;
1589 break;
ece238a6
NS
1590 case 'w':
1591 stop_watch = atoi(optarg);
1592 if (stop_watch <= 0) {
1593 fprintf(stderr,
1594 "Invalid stopwatch value (%d secs)\n",
1595 stop_watch);
1596 return 1;
1597 }
1598 break;
57ea8602 1599 case 'V':
52724a0e
JA
1600 printf("%s version %s\n", argv[0], blktrace_version);
1601 return 0;
129aa440 1602 case 'b':
eb3c8108 1603 buf_size = strtoul(optarg, NULL, 10);
183a0855 1604 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 1605 fprintf(stderr,
eb3c8108 1606 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
1607 return 1;
1608 }
1609 buf_size <<= 10;
1610 break;
1611 case 'n':
eb3c8108 1612 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1613 if (buf_nr <= 0) {
1614 fprintf(stderr,
eb3c8108 1615 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1616 return 1;
1617 }
1618 break;
d1d7f15f
JA
1619 case 'D':
1620 output_dir = optarg;
1621 break;
8e86c98a
JA
1622 case 'h':
1623 net_mode = Net_client;
1624 strcpy(hostname, optarg);
1625 break;
1626 case 'l':
1627 net_mode = Net_server;
1628 break;
1629 case 'p':
1630 net_port = atoi(optarg);
1631 break;
32f18c48 1632 case 's':
f6fead25 1633 net_use_sendfile = 1;
32f18c48 1634 break;
d39c04ca 1635 default:
ee1f4158 1636 show_usage(argv[0]);
7425d456 1637 return 1;
d39c04ca
AB
1638 }
1639 }
1640
8e86c98a
JA
1641 setlocale(LC_NUMERIC, "en_US");
1642
1643 page_size = getpagesize();
1644
1645 if (net_mode == Net_server)
1646 return net_server();
1647
22cd0c02
JA
1648 while (optind < argc) {
1649 if (resize_devices(argv[optind++]) != 0)
1650 return 1;
1651 }
1652
e7c9f3ff 1653 if (ndevs == 0) {
ee1f4158 1654 show_usage(argv[0]);
7425d456 1655 return 1;
d39c04ca
AB
1656 }
1657
5270dddd
JA
1658 if (!relay_path)
1659 relay_path = default_relay_path;
1660
d5396421 1661 if (act_mask_tmp != 0)
d39c04ca 1662 act_mask = act_mask_tmp;
d0ca268b 1663
e3e74029
NS
1664 if (statfs(relay_path, &st) < 0) {
1665 perror("statfs");
1666 fprintf(stderr,"%s does not appear to be a valid path\n",
1667 relay_path);
1668 return 1;
64acacae 1669 } else if (st.f_type != (long) RELAYFS_TYPE) {
e3e74029 1670 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
d0ca268b 1671 relay_path);
7425d456 1672 return 1;
d0ca268b
JA
1673 }
1674
e7c9f3ff 1675 if (open_devices() != 0)
7425d456 1676 return 1;
bc39777c
JA
1677
1678 if (kill_running_trace) {
e7c9f3ff 1679 stop_all_traces();
7425d456 1680 return 0;
bc39777c
JA
1681 }
1682
e7c9f3ff
NS
1683 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1684 if (ncpus < 0) {
1685 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 1686 return 1;
d0ca268b
JA
1687 }
1688
d0ca268b
JA
1689 signal(SIGINT, handle_sigint);
1690 signal(SIGHUP, handle_sigint);
1691 signal(SIGTERM, handle_sigint);
ece238a6 1692 signal(SIGALRM, handle_sigint);
d0ca268b 1693
8e86c98a
JA
1694 if (net_mode == Net_client && net_setup_client())
1695 return 1;
1696
1697 if (start_devices() != 0)
1698 return 1;
1699
e7c9f3ff 1700 atexit(stop_all_tracing);
830fd65c 1701
ece238a6
NS
1702 if (stop_watch)
1703 alarm(stop_watch);
1704
b7106311 1705 wait_for_threads();
d0ca268b 1706
eb3c8108
JA
1707 if (!is_trace_stopped()) {
1708 trace_stopped = 1;
91816d54
JA
1709 stop_all_threads();
1710 stop_all_traces();
91816d54 1711 }
d0ca268b 1712
eb3c8108
JA
1713 show_stats();
1714
d0ca268b
JA
1715 return 0;
1716}
1717