[PATCH] blktrace: no need to track ts->offset anymore
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
eb3c8108 32#include <sys/poll.h>
b7106311 33#include <sys/mman.h>
8e86c98a 34#include <sys/socket.h>
d0ca268b
JA
35#include <stdio.h>
36#include <stdlib.h>
37#include <sched.h>
d39c04ca
AB
38#include <ctype.h>
39#include <getopt.h>
da39451f 40#include <errno.h>
8e86c98a
JA
41#include <netinet/in.h>
42#include <arpa/inet.h>
43#include <netdb.h>
32f18c48 44#include <sys/sendfile.h>
d0ca268b
JA
45
46#include "blktrace.h"
21f55651 47#include "barrier.h"
d0ca268b 48
13d928f0 49static char blktrace_version[] = "0.99";
52724a0e 50
8f551a39
JA
51/*
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
54 */
007c233c 55#define BUF_SIZE (512 * 1024)
d0ca268b
JA
56#define BUF_NR (4)
57
007c233c
JA
58#define OFILE_BUF (128 * 1024)
59
e3e74029
NS
60#define RELAYFS_TYPE 0xF0B4A981
61
32f18c48 62#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
d5396421 63static struct option l_opts[] = {
5c86134e 64 {
d39c04ca 65 .name = "dev",
428683db 66 .has_arg = required_argument,
d39c04ca
AB
67 .flag = NULL,
68 .val = 'd'
69 },
5c86134e 70 {
d39c04ca 71 .name = "act-mask",
428683db 72 .has_arg = required_argument,
d39c04ca
AB
73 .flag = NULL,
74 .val = 'a'
75 },
5c86134e 76 {
d39c04ca 77 .name = "set-mask",
428683db 78 .has_arg = required_argument,
d39c04ca
AB
79 .flag = NULL,
80 .val = 'A'
81 },
5c86134e 82 {
5270dddd 83 .name = "relay",
428683db 84 .has_arg = required_argument,
5270dddd
JA
85 .flag = NULL,
86 .val = 'r'
87 },
d5396421
JA
88 {
89 .name = "output",
428683db 90 .has_arg = required_argument,
d5396421
JA
91 .flag = NULL,
92 .val = 'o'
93 },
bc39777c
JA
94 {
95 .name = "kill",
428683db 96 .has_arg = no_argument,
bc39777c
JA
97 .flag = NULL,
98 .val = 'k'
99 },
ece238a6
NS
100 {
101 .name = "stopwatch",
428683db 102 .has_arg = required_argument,
ece238a6
NS
103 .flag = NULL,
104 .val = 'w'
105 },
52724a0e
JA
106 {
107 .name = "version",
108 .has_arg = no_argument,
109 .flag = NULL,
57ea8602 110 .val = 'V'
52724a0e 111 },
129aa440 112 {
3f65c585 113 .name = "buffer-size",
129aa440
JA
114 .has_arg = required_argument,
115 .flag = NULL,
116 .val = 'b'
117 },
118 {
3f65c585 119 .name = "num-sub-buffers",
129aa440
JA
120 .has_arg = required_argument,
121 .flag = NULL,
122 .val = 'n'
123 },
d1d7f15f 124 {
3f65c585 125 .name = "output-dir",
d1d7f15f
JA
126 .has_arg = required_argument,
127 .flag = NULL,
128 .val = 'D'
129 },
8e86c98a
JA
130 {
131 .name = "listen",
132 .has_arg = no_argument,
133 .flag = NULL,
134 .val = 'l'
135 },
136 {
137 .name = "host",
138 .has_arg = required_argument,
139 .flag = NULL,
140 .val = 'h'
141 },
142 {
143 .name = "port",
144 .has_arg = required_argument,
145 .flag = NULL,
146 .val = 'p'
147 },
32f18c48
JA
148 {
149 .name = "sendfile",
150 .has_arg = no_argument,
151 .flag = NULL,
152 .val = 's'
153 },
71ef8b7c
JA
154 {
155 .name = NULL,
156 }
d39c04ca
AB
157};
158
9db17354 159struct tip_subbuf {
9db17354 160 void *buf;
5be4bdaf
JA
161 unsigned int len;
162 unsigned int max_len;
9db17354
JA
163};
164
21f55651
JA
165#define FIFO_SIZE (1024) /* should be plenty big! */
166#define CL_SIZE (128) /* cache line, any bigger? */
167
168struct tip_subbuf_fifo {
169 int tail __attribute__((aligned(CL_SIZE)));
170 int head __attribute__((aligned(CL_SIZE)));
171 struct tip_subbuf *q[FIFO_SIZE];
172};
173
d0ca268b
JA
174struct thread_information {
175 int cpu;
176 pthread_t thread;
b9d4294e
JA
177
178 int fd;
a3e4d330 179 void *fd_buf;
b9d4294e
JA
180 char fn[MAXPATHLEN + 64];
181
11eedd9b
JA
182 int pfd;
183 size_t *pfd_buf;
184
e3bf54d8
JA
185 struct in_addr cl_in_addr;
186
007c233c
JA
187 FILE *ofile;
188 char *ofile_buffer;
32f18c48 189 off_t ofile_offset;
9db17354 190 int ofile_stdout;
8e86c98a 191 int ofile_mmap;
11629347 192 volatile int sendfile_pending;
007c233c 193
0cc7d25e
JA
194 int (*get_subbuf)(struct thread_information *, unsigned int);
195 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
196 int (*read_data)(struct thread_information *, void *, unsigned int);
197
d0ca268b 198 unsigned long events_processed;
b7106311 199 unsigned long long data_read;
e7c9f3ff 200 struct device_information *device;
9db17354
JA
201
202 int exited;
203
b7106311
JA
204 /*
205 * piped fifo buffers
206 */
21f55651 207 struct tip_subbuf_fifo fifo;
7de86b12 208 struct tip_subbuf *leftover_ts;
b7106311
JA
209
210 /*
211 * mmap controlled output files
212 */
213 unsigned long long fs_size;
214 unsigned long long fs_max_size;
215 unsigned long fs_off;
216 void *fs_buf;
217 unsigned long fs_buf_len;
d0ca268b
JA
218};
219
e7c9f3ff
NS
220struct device_information {
221 int fd;
222 char *path;
223 char buts_name[32];
99c1f5ab 224 volatile int trace_started;
eb3c8108 225 unsigned long drop_count;
e7c9f3ff
NS
226 struct thread_information *threads;
227};
d0ca268b 228
e7c9f3ff 229static int ncpus;
d0ca268b 230static struct thread_information *thread_information;
e7c9f3ff
NS
231static int ndevs;
232static struct device_information *device_information;
233
234/* command line option globals */
235static char *relay_path;
d5396421 236static char *output_name;
d1d7f15f 237static char *output_dir;
5c86134e 238static int act_mask = ~0U;
bc39777c 239static int kill_running_trace;
eb3c8108
JA
240static unsigned long buf_size = BUF_SIZE;
241static unsigned long buf_nr = BUF_NR;
b7106311 242static unsigned int page_size;
d39c04ca 243
e7c9f3ff
NS
244#define is_done() (*(volatile int *)(&done))
245static volatile int done;
246
eb3c8108
JA
247#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
248static volatile int trace_stopped;
249
250#define is_stat_shown() (*(volatile int *)(&stat_shown))
251static volatile int stat_shown;
a3e4d330 252
8e86c98a
JA
253int data_is_native = -1;
254
72ca8801
NS
255static void exit_trace(int status);
256
99c1f5ab
JA
257#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
258#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
259
260#define __for_each_dip(__d, __i, __e) \
261 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
262
263#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
9db17354
JA
264#define for_each_tip(__d, __t, __j) \
265 for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
99c1f5ab 266
8e86c98a
JA
267/*
268 * networking stuff follows. we include a magic number so we know whether
269 * to endianness convert or not
270 */
271struct blktrace_net_hdr {
272 u32 magic; /* same as trace magic */
22cd0c02 273 char buts_name[32]; /* trace name */
8e86c98a 274 u32 cpu; /* for which cpu */
22cd0c02 275 u32 max_cpus;
8e86c98a
JA
276 u32 len; /* length of following trace data */
277};
278
279#define TRACE_NET_PORT (8462)
280
281enum {
282 Net_none = 0,
283 Net_server,
284 Net_client,
285};
286
287/*
288 * network cmd line params
289 */
290static char hostname[MAXHOSTNAMELEN];
291static int net_port = TRACE_NET_PORT;
292static int net_mode = 0;
f6fead25 293static int net_use_sendfile;
8e86c98a
JA
294
295static int net_in_fd = -1;
296static int net_out_fd = -1;
297
298static void handle_sigint(__attribute__((__unused__)) int sig)
299{
7035d92d
JA
300 struct device_information *dip;
301 int i;
302
303 /*
304 * stop trace so we can reap currently produced data
305 */
306 for_each_dip(dip, i) {
921b05fe
JA
307 if (dip->fd == -1)
308 continue;
7035d92d
JA
309 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
310 perror("BLKTRACESTOP");
311 }
312
8e86c98a
JA
313 done = 1;
314}
315
eb3c8108
JA
316static int get_dropped_count(const char *buts_name)
317{
318 int fd;
319 char tmp[MAXPATHLEN + 64];
320
321 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
322 relay_path, buts_name);
323
324 fd = open(tmp, O_RDONLY);
325 if (fd < 0) {
326 /*
327 * this may be ok, if the kernel doesn't support dropped counts
328 */
329 if (errno == ENOENT)
330 return 0;
331
332 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
333 return -1;
334 }
335
336 if (read(fd, tmp, sizeof(tmp)) < 0) {
337 perror(tmp);
338 close(fd);
339 return -1;
340 }
341
342 close(fd);
343
344 return atoi(tmp);
345}
346
e7c9f3ff 347static int start_trace(struct device_information *dip)
d0ca268b
JA
348{
349 struct blk_user_trace_setup buts;
350
1f79c4a0 351 memset(&buts, 0, sizeof(buts));
129aa440
JA
352 buts.buf_size = buf_size;
353 buts.buf_nr = buf_nr;
d39c04ca 354 buts.act_mask = act_mask;
d0ca268b 355
ed71a31e
JA
356 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
357 perror("BLKTRACESETUP");
358 return 1;
359 }
360
361 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
362 perror("BLKTRACESTART");
d0ca268b
JA
363 return 1;
364 }
365
e7c9f3ff 366 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 367 dip_set_tracing(dip, 1);
d0ca268b
JA
368 return 0;
369}
370
e7c9f3ff 371static void stop_trace(struct device_information *dip)
d0ca268b 372{
99c1f5ab
JA
373 if (dip_tracing(dip) || kill_running_trace) {
374 dip_set_tracing(dip, 0);
cf9208ea 375
7035d92d
JA
376 /*
377 * should be stopped, just don't complain if it isn't
378 */
379 ioctl(dip->fd, BLKTRACESTOP);
380
ed71a31e
JA
381 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
382 perror("BLKTRACETEARDOWN");
cf9208ea 383
e7c9f3ff 384 close(dip->fd);
cf9208ea 385 dip->fd = -1;
707b0914 386 }
d0ca268b
JA
387}
388
e7c9f3ff
NS
389static void stop_all_traces(void)
390{
391 struct device_information *dip;
392 int i;
393
eb3c8108
JA
394 for_each_dip(dip, i) {
395 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 396 stop_trace(dip);
eb3c8108 397 }
e7c9f3ff
NS
398}
399
eb3c8108
JA
400static void wait_for_data(struct thread_information *tip)
401{
402 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
403
9db17354 404 do {
b4aabcb3 405 poll(&pfd, 1, 100);
9db17354
JA
406 if (pfd.revents & POLLIN)
407 break;
408 if (tip->ofile_stdout)
409 break;
410 } while (!is_done());
eb3c8108
JA
411}
412
0cc7d25e
JA
413static int read_data_file(struct thread_information *tip, void *buf,
414 unsigned int len)
d0ca268b 415{
ae9f71b3 416 int ret = 0;
bbabf03a 417
9db17354
JA
418 do {
419 wait_for_data(tip);
ae9f71b3 420
9db17354
JA
421 ret = read(tip->fd, buf, len);
422 if (!ret)
423 continue;
424 else if (ret > 0)
425 return ret;
426 else {
bbabf03a 427 if (errno != EAGAIN) {
a3e4d330
JA
428 perror(tip->fn);
429 fprintf(stderr,"Thread %d failed read of %s\n",
430 tip->cpu, tip->fn);
431 break;
432 }
9db17354 433 continue;
bbabf03a 434 }
9db17354 435 } while (!is_done());
8a43bac5 436
bbabf03a 437 return ret;
8e86c98a 438
8a43bac5
JA
439}
440
0cc7d25e
JA
441static int read_data_net(struct thread_information *tip, void *buf,
442 unsigned int len)
8e86c98a
JA
443{
444 unsigned int bytes_left = len;
445 int ret = 0;
446
447 do {
448 ret = recv(net_in_fd, buf, bytes_left, MSG_WAITALL);
449
450 if (!ret)
451 continue;
452 else if (ret < 0) {
453 if (errno != EAGAIN) {
454 perror(tip->fn);
455 fprintf(stderr, "server: failed read\n");
456 return 0;
457 }
458 continue;
459 } else {
460 buf += ret;
461 bytes_left -= ret;
462 }
463 } while (!is_done() && bytes_left);
464
410d7c62 465 return len - bytes_left;
8e86c98a
JA
466}
467
0cc7d25e
JA
468static int read_data(struct thread_information *tip, void *buf,
469 unsigned int len)
8e86c98a 470{
663962f7 471 return tip->read_data(tip, buf, len);
8e86c98a
JA
472}
473
474static inline struct tip_subbuf *
475subbuf_fifo_dequeue(struct thread_information *tip)
a3e4d330 476{
21f55651
JA
477 const int head = tip->fifo.head;
478 const int next = (head + 1) & (FIFO_SIZE - 1);
479
480 if (head != tip->fifo.tail) {
481 struct tip_subbuf *ts = tip->fifo.q[head];
482
483 store_barrier();
484 tip->fifo.head = next;
485 return ts;
486 }
487
488 return NULL;
9db17354 489}
eb3c8108 490
21f55651
JA
491static inline int subbuf_fifo_queue(struct thread_information *tip,
492 struct tip_subbuf *ts)
9db17354 493{
21f55651
JA
494 const int tail = tip->fifo.tail;
495 const int next = (tail + 1) & (FIFO_SIZE - 1);
496
497 if (next != tip->fifo.head) {
498 tip->fifo.q[tail] = ts;
499 store_barrier();
500 tip->fifo.tail = next;
501 return 0;
502 }
503
504 fprintf(stderr, "fifo too small!\n");
505 return 1;
a3e4d330
JA
506}
507
b7106311
JA
508/*
509 * For file output, truncate and mmap the file appropriately
510 */
8e86c98a 511static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
b7106311
JA
512{
513 int ofd = fileno(tip->ofile);
514 int ret;
515
516 /*
517 * extend file, if we have to. use chunks of 16 subbuffers.
518 */
519 if (tip->fs_off + buf_size > tip->fs_buf_len) {
520 if (tip->fs_buf) {
5975d309 521 munlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
522 munmap(tip->fs_buf, tip->fs_buf_len);
523 tip->fs_buf = NULL;
524 }
525
526 tip->fs_off = tip->fs_size & (page_size - 1);
527 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
528 tip->fs_max_size += tip->fs_buf_len;
529
530 if (ftruncate(ofd, tip->fs_max_size) < 0) {
531 perror("ftruncate");
532 return -1;
533 }
534
535 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
536 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
537 if (tip->fs_buf == MAP_FAILED) {
538 perror("mmap");
539 return -1;
540 }
5975d309 541 mlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
542 }
543
8e86c98a 544 ret = read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
b7106311 545 if (ret >= 0) {
dbfbd6db 546 tip->data_read += ret;
b7106311
JA
547 tip->fs_size += ret;
548 tip->fs_off += ret;
549 return 0;
550 }
551
552 return -1;
553}
554
18eed2a7
JA
555/*
556 * Use the copy approach for pipes and network
557 */
558static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
559{
560 struct tip_subbuf *ts = malloc(sizeof(*ts));
561 int ret;
562
563 ts->buf = malloc(buf_size);
564 ts->max_len = maxlen;
565
566 ret = read_data(tip, ts->buf, ts->max_len);
567 if (ret > 0) {
568 ts->len = ret;
dbfbd6db 569 tip->data_read += ret;
7035d92d
JA
570 if (subbuf_fifo_queue(tip, ts))
571 return -1;
18eed2a7
JA
572 }
573
574 return ret;
575}
576
32f18c48
JA
577static int get_subbuf_sendfile(struct thread_information *tip,
578 unsigned int maxlen)
579{
11eedd9b 580 struct tip_subbuf *ts;
32f18c48 581
18eed2a7
JA
582 wait_for_data(tip);
583
584 /*
585 * hack to get last data out, we can't use sendfile for that
586 */
587 if (is_done())
588 return get_subbuf(tip, maxlen);
589
11629347 590 if (tip->sendfile_pending) {
663962f7 591 usleep(100);
11eedd9b 592 return 0;
663962f7 593 }
11eedd9b 594
11629347
JA
595 ts = malloc(sizeof(*ts));
596 ts->buf = NULL;
597 ts->max_len = 0;
1be42f3d 598
11629347
JA
599 if (subbuf_fifo_queue(tip, ts))
600 return -1;
11eedd9b 601
11629347 602 tip->sendfile_pending++;
11629347 603 return buf_size;
32f18c48
JA
604}
605
9db17354 606static void close_thread(struct thread_information *tip)
a3e4d330 607{
9db17354
JA
608 if (tip->fd != -1)
609 close(tip->fd);
11eedd9b
JA
610 if (tip->pfd != -1)
611 close(tip->pfd);
9db17354
JA
612 if (tip->ofile)
613 fclose(tip->ofile);
614 if (tip->ofile_buffer)
615 free(tip->ofile_buffer);
616 if (tip->fd_buf)
617 free(tip->fd_buf);
11eedd9b
JA
618 if (tip->pfd_buf)
619 free(tip->pfd_buf);
1c99bc21 620
9db17354 621 tip->fd = -1;
11eedd9b 622 tip->pfd = -1;
9db17354
JA
623 tip->ofile = NULL;
624 tip->ofile_buffer = NULL;
625 tip->fd_buf = NULL;
a3e4d330
JA
626}
627
8e86c98a
JA
628static void tip_ftrunc_final(struct thread_information *tip)
629{
630 /*
631 * truncate to right size and cleanup mmap
632 */
633 if (tip->ofile_mmap) {
634 int ofd = fileno(tip->ofile);
635
636 if (tip->fs_buf)
637 munmap(tip->fs_buf, tip->fs_buf_len);
638
639 ftruncate(ofd, tip->fs_size);
640 }
641}
642
9db17354 643static void *thread_main(void *arg)
a3e4d330 644{
9db17354
JA
645 struct thread_information *tip = arg;
646 pid_t pid = getpid();
647 cpu_set_t cpu_mask;
a3e4d330 648
9db17354
JA
649 CPU_ZERO(&cpu_mask);
650 CPU_SET((tip->cpu), &cpu_mask);
a3e4d330 651
9db17354
JA
652 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
653 perror("sched_setaffinity");
654 exit_trace(1);
655 }
a3e4d330 656
9db17354
JA
657 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
658 relay_path, tip->device->buts_name, tip->cpu);
659 tip->fd = open(tip->fn, O_RDONLY);
660 if (tip->fd < 0) {
661 perror(tip->fn);
662 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
663 tip->fn);
664 exit_trace(1);
a3e4d330
JA
665 }
666
f6fead25 667 if (net_mode == Net_client && net_use_sendfile) {
11eedd9b
JA
668 char tmp[MAXPATHLEN + 64];
669
670 snprintf(tmp, sizeof(tmp), "%s/block/%s/trace%d.padding",
671 relay_path, tip->device->buts_name, tip->cpu);
672
673 tip->pfd = open(tmp, O_RDONLY);
674 if (tip->pfd < 0) {
675 fprintf(stderr, "Couldn't open padding file %s\n", tmp);
676 exit_trace(1);
677 }
678
679 tip->pfd_buf = malloc(buf_nr * sizeof(size_t));
680 }
681
b7106311 682 while (!is_done()) {
7035d92d 683 if (tip->get_subbuf(tip, buf_size) < 0)
0cc7d25e 684 break;
b7106311
JA
685 }
686
7035d92d
JA
687 /*
688 * trace is stopped, pull data until we get a short read
689 */
690 while (tip->get_subbuf(tip, buf_size) > 0)
691 ;
692
8e86c98a
JA
693 tip_ftrunc_final(tip);
694 tip->exited = 1;
695 return NULL;
696}
b7106311 697
8e86c98a
JA
698static int write_data_net(int fd, void *buf, unsigned int buf_len)
699{
700 unsigned int bytes_left = buf_len;
701 int ret;
b7106311 702
8e86c98a
JA
703 while (bytes_left) {
704 ret = send(fd, buf, bytes_left, 0);
705 if (ret < 0) {
706 perror("send");
707 return 1;
708 }
709
710 buf += ret;
711 bytes_left -= ret;
9db17354 712 }
a3e4d330 713
8e86c98a 714 return 0;
a3e4d330
JA
715}
716
32f18c48 717static int net_send_header(struct thread_information *tip, unsigned int len)
8e86c98a
JA
718{
719 struct blktrace_net_hdr hdr;
8e86c98a
JA
720
721 hdr.magic = BLK_IO_TRACE_MAGIC;
22cd0c02 722 strcpy(hdr.buts_name, tip->device->buts_name);
8e86c98a 723 hdr.cpu = tip->cpu;
22cd0c02 724 hdr.max_cpus = ncpus;
32f18c48 725 hdr.len = len;
8e86c98a 726
32f18c48
JA
727 return write_data_net(net_out_fd, &hdr, sizeof(hdr));
728}
8e86c98a 729
6a752c90
JA
730/*
731 * send header with 0 length to signal end-of-run
732 */
733static void net_client_send_close(void)
734{
735 struct blktrace_net_hdr hdr;
736
737 hdr.magic = BLK_IO_TRACE_MAGIC;
738 hdr.cpu = 0;
739 hdr.max_cpus = ncpus;
740 hdr.len = 0;
741
742 write_data_net(net_out_fd, &hdr, sizeof(hdr));
743}
744
32f18c48
JA
745static int flush_subbuf_net(struct thread_information *tip,
746 struct tip_subbuf *ts)
747{
748 if (net_send_header(tip, ts->len))
749 return 1;
22cd0c02
JA
750 if (write_data_net(net_out_fd, ts->buf, ts->len))
751 return 1;
8e86c98a 752
f0597a7e 753 free(ts->buf);
8e86c98a
JA
754 free(ts);
755 return 0;
756}
757
f6fead25
JA
758static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
759{
11629347
JA
760 int ret = sendfile(net_out_fd, tip->fd, NULL, ts->len);
761
762 if (ret < 0) {
763 perror("sendfile");
764 return 1;
765 } else if (ret < (int) ts->len) {
766 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
767 return 1;
768 }
769
770 return 0;
771}
772
773static int get_subbuf_padding(struct thread_information *tip, off_t off)
774{
775 int padding_size = buf_nr * sizeof(size_t);
f6fead25
JA
776 int ret;
777
11629347
JA
778 ret = read(tip->pfd, tip->pfd_buf, padding_size);
779 if (ret == padding_size) {
780 int subbuf = (off / buf_size) % buf_nr;
f6fead25 781
11629347
JA
782 ret = tip->pfd_buf[subbuf];
783 } else if (ret < 0)
784 perror("tip pad read");
785 else {
786 fprintf(stderr, "bad pad size read\n");
787 ret = -1;
f6fead25
JA
788 }
789
11629347 790 return ret;
f6fead25
JA
791}
792
32f18c48
JA
793static int flush_subbuf_sendfile(struct thread_information *tip,
794 struct tip_subbuf *ts)
795{
11629347 796 int pad, ret = 1;
18eed2a7
JA
797
798 /*
799 * currently we cannot use sendfile() on the last bytes read, as they
800 * may not be a full subbuffer. get_subbuf_sendfile() falls back to
801 * the read approach for those, so use send() to ship them out
802 */
803 if (ts->buf)
804 return flush_subbuf_net(tip, ts);
11eedd9b 805
e076d33b 806 pad = get_subbuf_padding(tip, tip->ofile_offset);
11629347
JA
807 if (pad == -1)
808 goto err;
809
810 ts->len = buf_size - pad;
11eedd9b 811
f6fead25 812 if (net_send_header(tip, ts->len))
11629347 813 goto err;
f6fead25 814 if (net_sendfile(tip, ts))
11629347 815 goto err;
32f18c48 816
f6fead25 817 tip->data_read += ts->len;
e076d33b
JA
818 tip->ofile_offset += buf_size;
819 ret = 0;
11629347
JA
820err:
821 tip->sendfile_pending--;
32f18c48 822 free(ts);
11629347 823 return ret;
32f18c48
JA
824}
825
8e86c98a
JA
826static int write_data(struct thread_information *tip, void *buf,
827 unsigned int buf_len)
8a43bac5 828{
7126171a 829 int ret;
8a43bac5 830
6480258a
JA
831 if (!buf_len)
832 return 0;
833
7126171a
JA
834 while (1) {
835 ret = fwrite(buf, buf_len, 1, tip->ofile);
007c233c 836 if (ret == 1)
8a43bac5
JA
837 break;
838
db6fe5bc
JA
839 if (ret < 0) {
840 perror("write");
841 return 1;
8a43bac5 842 }
d0ca268b
JA
843 }
844
9db17354 845 if (tip->ofile_stdout)
7126171a
JA
846 fflush(tip->ofile);
847
8a43bac5
JA
848 return 0;
849}
850
8e86c98a
JA
851static int flush_subbuf_file(struct thread_information *tip,
852 struct tip_subbuf *ts)
8a43bac5 853{
9db17354
JA
854 unsigned int offset = 0;
855 struct blk_io_trace *t;
856 int pdu_len, events = 0;
8a43bac5 857
9db17354 858 /*
7de86b12 859 * surplus from last run
9db17354 860 */
7de86b12
AB
861 if (tip->leftover_ts) {
862 struct tip_subbuf *prev_ts = tip->leftover_ts;
863
9e8b753c 864 if (prev_ts->len + ts->len > prev_ts->max_len) {
7de86b12
AB
865 prev_ts->max_len += ts->len;
866 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
867 }
868
9e8b753c 869 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
7de86b12
AB
870 prev_ts->len += ts->len;
871
872 free(ts->buf);
873 free(ts);
874
875 ts = prev_ts;
876 tip->leftover_ts = NULL;
9db17354 877 }
d0ca268b 878
9db17354
JA
879 while (offset + sizeof(*t) <= ts->len) {
880 t = ts->buf + offset;
3a9d6c13 881
9cfa6c2b
AB
882 if (verify_trace(t)) {
883 write_data(tip, ts->buf, offset);
9db17354 884 return -1;
9cfa6c2b 885 }
3a9d6c13 886
9db17354 887 pdu_len = t->pdu_len;
3a9d6c13 888
9db17354 889 if (offset + sizeof(*t) + pdu_len > ts->len)
3a9d6c13 890 break;
4b5db44a 891
9db17354
JA
892 offset += sizeof(*t) + pdu_len;
893 tip->events_processed++;
b7106311 894 tip->data_read += sizeof(*t) + pdu_len;
9db17354 895 events++;
3a9d6c13
JA
896 }
897
9cfa6c2b
AB
898 if (write_data(tip, ts->buf, offset))
899 return -1;
900
3a9d6c13 901 /*
9db17354 902 * leftover bytes, save them for next time
3a9d6c13 903 */
9db17354 904 if (offset != ts->len) {
7de86b12 905 tip->leftover_ts = ts;
9e8b753c
JA
906 ts->len -= offset;
907 memmove(ts->buf, ts->buf + offset, ts->len);
7de86b12
AB
908 } else {
909 free(ts->buf);
910 free(ts);
9db17354 911 }
4b5db44a 912
9db17354 913 return events;
4b5db44a
JA
914}
915
9db17354 916static int write_tip_events(struct thread_information *tip)
d5396421 917{
21f55651 918 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
d5396421 919
0cc7d25e
JA
920 if (ts)
921 return tip->flush_subbuf(tip, ts);
91816d54 922
9db17354 923 return 0;
91816d54
JA
924}
925
9db17354
JA
926/*
927 * scans the tips we know and writes out the subbuffers we accumulate
928 */
929static void get_and_write_events(void)
d0ca268b 930{
9db17354
JA
931 struct device_information *dip;
932 struct thread_information *tip;
27223f19 933 int i, j, events, ret, tips_running;
d0ca268b 934
9db17354
JA
935 while (!is_done()) {
936 events = 0;
d0ca268b 937
9db17354
JA
938 for_each_dip(dip, i) {
939 for_each_tip(dip, tip, j) {
940 ret = write_tip_events(tip);
941 if (ret > 0)
942 events += ret;
943 }
944 }
d0ca268b 945
9db17354
JA
946 if (!events)
947 usleep(10);
d0ca268b
JA
948 }
949
a3e4d330 950 /*
9db17354 951 * reap stored events
a3e4d330 952 */
9db17354
JA
953 do {
954 events = 0;
27223f19 955 tips_running = 0;
9db17354
JA
956 for_each_dip(dip, i) {
957 for_each_tip(dip, tip, j) {
958 ret = write_tip_events(tip);
959 if (ret > 0)
960 events += ret;
27223f19 961 tips_running += !tip->exited;
9db17354 962 }
69e65a9e 963 }
9db17354 964 usleep(10);
27223f19 965 } while (events || tips_running);
d0ca268b
JA
966}
967
b7106311
JA
968static void wait_for_threads(void)
969{
970 /*
8e86c98a
JA
971 * for piped or network output, poll and fetch data for writeout.
972 * for files, we just wait around for trace threads to exit
b7106311 973 */
8e86c98a
JA
974 if ((output_name && !strcmp(output_name, "-")) ||
975 net_mode == Net_client)
b7106311
JA
976 get_and_write_events();
977 else {
978 struct device_information *dip;
979 struct thread_information *tip;
980 int i, j, tips_running;
981
982 do {
983 tips_running = 0;
984 usleep(1000);
985
986 for_each_dip(dip, i)
987 for_each_tip(dip, tip, j)
988 tips_running += !tip->exited;
989 } while (tips_running);
990 }
6a752c90
JA
991
992 if (net_mode == Net_client)
993 net_client_send_close();
b7106311
JA
994}
995
e3bf54d8
JA
996static int fill_ofname(struct thread_information *tip, char *dst,
997 char *buts_name)
8e86c98a 998{
e3bf54d8 999 struct stat sb;
8e86c98a 1000 int len = 0;
e3bf54d8 1001 time_t t;
8e86c98a
JA
1002
1003 if (output_dir)
1004 len = sprintf(dst, "%s/", output_dir);
1005
e3bf54d8
JA
1006 if (net_mode == Net_server) {
1007 len += sprintf(dst + len, "%s-", inet_ntoa(tip->cl_in_addr));
1008 time(&t);
1009 len += strftime(dst + len, 64, "%F-%T/", gmtime(&t));
1010 }
1011
1012 if (stat(dst, &sb) < 0) {
1013 if (errno != ENOENT) {
1014 perror("stat");
1015 return 1;
1016 }
1017 if (mkdir(dst, 0755) < 0) {
1018 perror(dst);
1019 fprintf(stderr, "Can't make output dir\n");
1020 return 1;
1021 }
1022 }
1023
8e86c98a 1024 if (output_name)
e3bf54d8 1025 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
8e86c98a 1026 else
e3bf54d8
JA
1027 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1028
1029 return 0;
8e86c98a
JA
1030}
1031
0cc7d25e
JA
1032static void fill_ops(struct thread_information *tip)
1033{
1034 /*
1035 * setup ops
1036 */
32f18c48 1037 if (net_mode == Net_client) {
36808255 1038 if (net_use_sendfile) {
32f18c48
JA
1039 tip->get_subbuf = get_subbuf_sendfile;
1040 tip->flush_subbuf = flush_subbuf_sendfile;
1041 } else {
1042 tip->get_subbuf = get_subbuf;
1043 tip->flush_subbuf = flush_subbuf_net;
1044 }
1045 } else {
1046 if (tip->ofile_mmap)
1047 tip->get_subbuf = mmap_subbuf;
1048 else
1049 tip->get_subbuf = get_subbuf;
0cc7d25e 1050
0cc7d25e 1051 tip->flush_subbuf = flush_subbuf_file;
32f18c48
JA
1052 }
1053
0cc7d25e
JA
1054 if (net_mode == Net_server)
1055 tip->read_data = read_data_net;
1056 else
1057 tip->read_data = read_data_file;
1058}
1059
ddf22842
JA
1060static int tip_open_output(struct device_information *dip,
1061 struct thread_information *tip)
d0ca268b 1062{
ddf22842 1063 int pipeline = output_name && !strcmp(output_name, "-");
8e86c98a 1064 int mode, vbuf_size;
e3bf54d8 1065 char op[128];
d0ca268b 1066
ddf22842
JA
1067 if (net_mode == Net_client) {
1068 tip->ofile = NULL;
1069 tip->ofile_stdout = 0;
1070 tip->ofile_mmap = 0;
0c0b75b4 1071 goto done;
ddf22842
JA
1072 } else if (pipeline) {
1073 tip->ofile = fdopen(STDOUT_FILENO, "w");
1074 tip->ofile_stdout = 1;
1075 tip->ofile_mmap = 0;
1076 mode = _IOLBF;
1077 vbuf_size = 512;
1078 } else {
e3bf54d8
JA
1079 if (fill_ofname(tip, op, dip->buts_name))
1080 return 1;
ddf22842
JA
1081 tip->ofile = fopen(op, "w+");
1082 tip->ofile_stdout = 0;
1083 tip->ofile_mmap = 1;
1084 mode = _IOFBF;
1085 vbuf_size = OFILE_BUF;
1086 }
d5396421 1087
0c0b75b4 1088 if (tip->ofile == NULL) {
ddf22842
JA
1089 perror(op);
1090 return 1;
1091 }
d5396421 1092
0c0b75b4
JA
1093 tip->ofile_buffer = malloc(vbuf_size);
1094 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1095 perror("setvbuf");
1096 close_thread(tip);
1097 return 1;
ddf22842
JA
1098 }
1099
0c0b75b4 1100done:
ddf22842
JA
1101 fill_ops(tip);
1102 return 0;
1103}
007c233c 1104
ddf22842
JA
1105static int start_threads(struct device_information *dip)
1106{
1107 struct thread_information *tip;
1108 int j;
1109
1110 for_each_tip(dip, tip, j) {
1111 tip->cpu = j;
1112 tip->device = dip;
1113 tip->events_processed = 0;
11eedd9b
JA
1114 tip->fd = -1;
1115 tip->pfd = -1;
ddf22842
JA
1116 memset(&tip->fifo, 0, sizeof(tip->fifo));
1117 tip->leftover_ts = NULL;
1118
1119 if (tip_open_output(dip, tip))
1120 return 1;
0cc7d25e 1121
9db17354 1122 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
e7c9f3ff 1123 perror("pthread_create");
007c233c 1124 close_thread(tip);
e7c9f3ff 1125 return 1;
d0ca268b
JA
1126 }
1127 }
1128
e7c9f3ff 1129 return 0;
d0ca268b
JA
1130}
1131
e7c9f3ff 1132static void stop_threads(struct device_information *dip)
3aabcd89 1133{
e7c9f3ff 1134 struct thread_information *tip;
91816d54 1135 unsigned long ret;
007c233c
JA
1136 int i;
1137
9db17354 1138 for_each_tip(dip, tip, i) {
91816d54 1139 (void) pthread_join(tip->thread, (void *) &ret);
9db17354
JA
1140 close_thread(tip);
1141 }
3aabcd89
JA
1142}
1143
e7c9f3ff 1144static void stop_all_threads(void)
72ca8801 1145{
e7c9f3ff 1146 struct device_information *dip;
72ca8801
NS
1147 int i;
1148
99c1f5ab 1149 for_each_dip(dip, i)
e7c9f3ff
NS
1150 stop_threads(dip);
1151}
1152
1153static void stop_all_tracing(void)
1154{
1155 struct device_information *dip;
91816d54 1156 int i;
007c233c 1157
91816d54 1158 for_each_dip(dip, i)
e7c9f3ff 1159 stop_trace(dip);
72ca8801
NS
1160}
1161
1162static void exit_trace(int status)
1163{
eb3c8108
JA
1164 if (!is_trace_stopped()) {
1165 trace_stopped = 1;
1166 stop_all_threads();
1167 stop_all_tracing();
1168 }
1169
72ca8801
NS
1170 exit(status);
1171}
1172
e7c9f3ff
NS
1173static int resize_devices(char *path)
1174{
1175 int size = (ndevs + 1) * sizeof(struct device_information);
1176
1177 device_information = realloc(device_information, size);
1178 if (!device_information) {
1179 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1180 return 1;
1181 }
1182 device_information[ndevs].path = path;
1183 ndevs++;
1184 return 0;
1185}
1186
1187static int open_devices(void)
d0ca268b 1188{
e7c9f3ff 1189 struct device_information *dip;
d0ca268b 1190 int i;
d0ca268b 1191
99c1f5ab 1192 for_each_dip(dip, i) {
cf9208ea 1193 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
1194 if (dip->fd < 0) {
1195 perror(dip->path);
1196 return 1;
1197 }
1198 }
99c1f5ab 1199
e7c9f3ff
NS
1200 return 0;
1201}
1202
1203static int start_devices(void)
1204{
1205 struct device_information *dip;
1206 int i, j, size;
1207
1208 size = ncpus * sizeof(struct thread_information);
1209 thread_information = malloc(size * ndevs);
1210 if (!thread_information) {
1211 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1212 return 1;
1213 }
d5396421 1214
99c1f5ab 1215 for_each_dip(dip, i) {
e7c9f3ff
NS
1216 if (start_trace(dip)) {
1217 close(dip->fd);
1218 fprintf(stderr, "Failed to start trace on %s\n",
1219 dip->path);
1220 break;
1221 }
1222 }
99c1f5ab 1223
e7c9f3ff 1224 if (i != ndevs) {
99c1f5ab 1225 __for_each_dip(dip, j, i)
e7c9f3ff 1226 stop_trace(dip);
99c1f5ab 1227
e7c9f3ff
NS
1228 return 1;
1229 }
1230
99c1f5ab 1231 for_each_dip(dip, i) {
e7c9f3ff
NS
1232 dip->threads = thread_information + (i * ncpus);
1233 if (start_threads(dip)) {
1234 fprintf(stderr, "Failed to start worker threads\n");
1235 break;
1236 }
1237 }
99c1f5ab 1238
e7c9f3ff 1239 if (i != ndevs) {
99c1f5ab 1240 __for_each_dip(dip, j, i)
e7c9f3ff 1241 stop_threads(dip);
99c1f5ab 1242 for_each_dip(dip, i)
e7c9f3ff 1243 stop_trace(dip);
99c1f5ab 1244
e7c9f3ff 1245 return 1;
d0ca268b
JA
1246 }
1247
e7c9f3ff 1248 return 0;
d0ca268b
JA
1249}
1250
e7c9f3ff
NS
1251static void show_stats(void)
1252{
e7c9f3ff
NS
1253 struct device_information *dip;
1254 struct thread_information *tip;
b7106311 1255 unsigned long long events_processed, data_read;
eb3c8108 1256 unsigned long total_drops;
2f903295 1257 int i, j, no_stdout = 0;
eb3c8108
JA
1258
1259 if (is_stat_shown())
1260 return;
1261
2f903295
JA
1262 if (output_name && !strcmp(output_name, "-"))
1263 no_stdout = 1;
1264
eb3c8108 1265 stat_shown = 1;
428683db 1266
56070ea4 1267 total_drops = 0;
99c1f5ab 1268 for_each_dip(dip, i) {
2f903295 1269 if (!no_stdout)
56070ea4 1270 printf("Device: %s\n", dip->path);
e7c9f3ff 1271 events_processed = 0;
b7106311 1272 data_read = 0;
99c1f5ab 1273 for_each_tip(dip, tip, j) {
2f903295 1274 if (!no_stdout)
b7106311
JA
1275 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1276 tip->cpu, tip->events_processed,
54824c20 1277 (tip->data_read + 1023) >> 10);
e7c9f3ff 1278 events_processed += tip->events_processed;
b7106311 1279 data_read += tip->data_read;
e7c9f3ff 1280 }
eb3c8108 1281 total_drops += dip->drop_count;
2f903295 1282 if (!no_stdout)
b7106311
JA
1283 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1284 events_processed, dip->drop_count,
18d8437d 1285 (data_read + 1023) >> 10);
e7c9f3ff 1286 }
56070ea4
JA
1287
1288 if (total_drops)
1289 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 1290}
52724a0e 1291
e3bf54d8
JA
1292static struct device_information *net_get_dip(char *buts_name,
1293 struct in_addr *cl_in_addr)
8e86c98a 1294{
22cd0c02 1295 struct device_information *dip;
8e86c98a
JA
1296 int i;
1297
22cd0c02
JA
1298 for (i = 0; i < ndevs; i++) {
1299 dip = &device_information[i];
8e86c98a 1300
22cd0c02
JA
1301 if (!strcmp(dip->buts_name, buts_name))
1302 return dip;
8e86c98a
JA
1303 }
1304
22cd0c02
JA
1305 device_information = realloc(device_information, (ndevs + 1) * sizeof(*dip));
1306 dip = &device_information[ndevs];
921b05fe
JA
1307 memset(dip, 0, sizeof(*dip));
1308 dip->fd = -1;
22cd0c02 1309 strcpy(dip->buts_name, buts_name);
921b05fe 1310 dip->path = strdup(buts_name);
22cd0c02
JA
1311 ndevs++;
1312 dip->threads = malloc(ncpus * sizeof(struct thread_information));
1313 memset(dip->threads, 0, ncpus * sizeof(struct thread_information));
1314
1315 /*
1316 * open all files
1317 */
1318 for (i = 0; i < ncpus; i++) {
1319 struct thread_information *tip = &dip->threads[i];
8e86c98a 1320
22cd0c02 1321 tip->cpu = i;
22cd0c02 1322 tip->device = dip;
1366e53a
JA
1323 tip->fd = -1;
1324 tip->pfd = -1;
e3bf54d8 1325 tip->cl_in_addr = *cl_in_addr;
8e86c98a 1326
ddf22842 1327 if (tip_open_output(dip, tip))
22cd0c02 1328 return NULL;
8e86c98a
JA
1329 }
1330
22cd0c02
JA
1331 return dip;
1332}
1333
e3bf54d8
JA
1334static struct thread_information *net_get_tip(struct blktrace_net_hdr *bnh,
1335 struct in_addr *cl_in_addr)
22cd0c02
JA
1336{
1337 struct device_information *dip;
1338
1339 ncpus = bnh->max_cpus;
e3bf54d8 1340 dip = net_get_dip(bnh->buts_name, cl_in_addr);
22cd0c02 1341 return &dip->threads[bnh->cpu];
8e86c98a
JA
1342}
1343
1344static int net_get_header(struct blktrace_net_hdr *bnh)
1345{
1346 int fl = fcntl(net_in_fd, F_GETFL);
1347 int bytes_left, ret;
1348 void *p = bnh;
1349
1350 fcntl(net_in_fd, F_SETFL, fl | O_NONBLOCK);
1351 bytes_left = sizeof(*bnh);
1352 while (bytes_left && !is_done()) {
1353 ret = recv(net_in_fd, p, bytes_left, MSG_WAITALL);
1354 if (ret < 0) {
1355 if (errno != EAGAIN) {
1356 perror("recv header");
1357 return 1;
1358 }
1359 usleep(100);
1360 continue;
1361 } else if (!ret) {
1362 usleep(100);
1363 continue;
1364 } else {
1365 p += ret;
1366 bytes_left -= ret;
1367 }
1368 }
1369 fcntl(net_in_fd, F_SETFL, fl & ~O_NONBLOCK);
227f89ff 1370 return bytes_left;
8e86c98a
JA
1371}
1372
e3bf54d8 1373static int net_server_loop(struct in_addr *cl_in_addr)
8e86c98a
JA
1374{
1375 struct thread_information *tip;
1376 struct blktrace_net_hdr bnh;
1377
1378 if (net_get_header(&bnh))
1379 return 1;
1380
1381 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1382 fprintf(stderr, "server: received data is bad\n");
1383 return 1;
1384 }
1385
1386 if (!data_is_native) {
227f89ff 1387 bnh.magic = be32_to_cpu(bnh.magic);
8e86c98a
JA
1388 bnh.cpu = be32_to_cpu(bnh.cpu);
1389 bnh.len = be32_to_cpu(bnh.len);
1390 }
1391
227f89ff
JA
1392 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1393 fprintf(stderr, "server: bad data magic\n");
1394 return 1;
1395 }
1396
6a752c90
JA
1397 /*
1398 * len == 0 means that the other end signalled end-of-run
1399 */
1400 if (!bnh.len) {
1401 fprintf(stderr, "server: end of run\n");
1402 return 1;
1403 }
1404
e3bf54d8 1405 tip = net_get_tip(&bnh, cl_in_addr);
8e86c98a
JA
1406 if (!tip)
1407 return 1;
1408
1409 if (mmap_subbuf(tip, bnh.len))
1410 return 1;
1411
1412 return 0;
1413}
1414
1415/*
1416 * Start here when we are in server mode - just fetch data from the network
1417 * and dump to files
1418 */
1419static int net_server(void)
1420{
898bbd3b
JA
1421 struct device_information *dip;
1422 struct thread_information *tip;
8e86c98a
JA
1423 struct sockaddr_in addr;
1424 socklen_t socklen;
22cd0c02 1425 int fd, opt, i, j;
8e86c98a
JA
1426
1427 fd = socket(AF_INET, SOCK_STREAM, 0);
1428 if (fd < 0) {
1429 perror("server: socket");
1430 return 1;
1431 }
1432
1433 opt = 1;
1434 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1435 perror("setsockopt");
1436 return 1;
1437 }
1438
1439 memset(&addr, 0, sizeof(addr));
1440 addr.sin_family = AF_INET;
1441 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1442 addr.sin_port = htons(net_port);
1443
1444 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1445 perror("bind");
1446 return 1;
1447 }
1448
1449 if (listen(fd, 1) < 0) {
1450 perror("listen");
1451 return 1;
1452 }
1453
6a752c90 1454repeat:
bbb1c18a
JA
1455 signal(SIGINT, NULL);
1456 signal(SIGHUP, NULL);
1457 signal(SIGTERM, NULL);
1458 signal(SIGALRM, NULL);
1459
8e86c98a
JA
1460 printf("blktrace: waiting for incoming connection...\n");
1461
1462 socklen = sizeof(addr);
1463 net_in_fd = accept(fd, (struct sockaddr *) &addr, &socklen);
1464 if (net_in_fd < 0) {
1465 perror("accept");
1466 return 1;
1467 }
1468
1469 signal(SIGINT, handle_sigint);
1470 signal(SIGHUP, handle_sigint);
1471 signal(SIGTERM, handle_sigint);
1472 signal(SIGALRM, handle_sigint);
1473
e85ee0f2 1474 printf("blktrace: connection from %s\n", inet_ntoa(addr.sin_addr));
8e86c98a
JA
1475
1476 while (!is_done()) {
e3bf54d8 1477 if (net_server_loop(&addr.sin_addr))
8e86c98a
JA
1478 break;
1479 }
1480
898bbd3b
JA
1481 for_each_dip(dip, i)
1482 for_each_tip(dip, tip, j)
1483 tip_ftrunc_final(tip);
8e86c98a 1484
410d7c62 1485 show_stats();
6a752c90
JA
1486
1487 if (is_done())
1488 return 0;
1489
898bbd3b
JA
1490 /*
1491 * cleanup for next run
1492 */
1493 for_each_dip(dip, i) {
1494 for_each_tip(dip, tip, j)
1495 fclose(tip->ofile);
1496
1497 free(dip->threads);
921b05fe 1498 free(dip->path);
898bbd3b
JA
1499 }
1500
1501 free(device_information);
1502 device_information = NULL;
1503 ncpus = ndevs = 0;
b46a0342
JA
1504
1505 close(net_in_fd);
1506 net_in_fd = -1;
dbfbd6db 1507 stat_shown = 0;
6a752c90 1508 goto repeat;
8e86c98a
JA
1509}
1510
1511/*
1512 * Setup outgoing network connection where we will transmit data
1513 */
1514static int net_setup_client(void)
1515{
1516 struct sockaddr_in addr;
1517 int fd;
1518
1519 fd = socket(AF_INET, SOCK_STREAM, 0);
1520 if (fd < 0) {
1521 perror("client: socket");
1522 return 1;
1523 }
1524
1525 memset(&addr, 0, sizeof(addr));
1526 addr.sin_family = AF_INET;
1527 addr.sin_port = htons(net_port);
1528
1529 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1530 struct hostent *hent = gethostbyname(hostname);
1531 if (!hent) {
1532 perror("gethostbyname");
1533 return 1;
1534 }
1535
1536 memcpy(&addr.sin_addr, hent->h_addr, 4);
1537 strcpy(hostname, hent->h_name);
1538 }
1539
1540 printf("blktrace: connecting to %s\n", hostname);
1541
1542 if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1543 perror("client: connect");
1544 return 1;
1545 }
1546
1547 printf("blktrace: connected!\n");
1548 net_out_fd = fd;
1549 return 0;
1550}
1551
52724a0e
JA
1552static char usage_str[] = \
1553 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1554 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1555 "\t-d Use specified device. May also be given last after options\n" \
1556 "\t-r Path to mounted relayfs, defaults to /relay\n" \
1557 "\t-o File(s) to send output to\n" \
d1d7f15f 1558 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
1559 "\t-k Kill a running trace\n" \
1560 "\t-w Stop after defined time, in seconds\n" \
1561 "\t-a Only trace specified actions. See documentation\n" \
1562 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
1563 "\t-b Sub buffer size in KiB\n" \
1564 "\t-n Number of sub buffers\n" \
f531b94d
JA
1565 "\t-l Run in network listen mode (blktrace server)\n" \
1566 "\t-h Run in network client mode, connecting to the given host\n" \
1567 "\t-p Network port to use (default 8462)\n" \
1568 "\t-s Make the network client use sendfile() to transfer data\n" \
1569 "\t-V Print program version info\n\n";
52724a0e 1570
ee1f4158
NS
1571static void show_usage(char *program)
1572{
52724a0e 1573 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 1574}
d0ca268b
JA
1575
1576int main(int argc, char *argv[])
1577{
5270dddd 1578 static char default_relay_path[] = "/relay";
e3e74029 1579 struct statfs st;
d39c04ca 1580 int i, c;
ece238a6 1581 int stop_watch = 0;
d39c04ca
AB
1582 int act_mask_tmp = 0;
1583
1584 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1585 switch (c) {
1586 case 'a':
1587 i = find_mask_map(optarg);
1588 if (i < 0) {
ab197ca7 1589 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 1590 optarg);
7425d456 1591 return 1;
d39c04ca
AB
1592 }
1593 act_mask_tmp |= i;
1594 break;
1595
1596 case 'A':
98f8386b
AB
1597 if ((sscanf(optarg, "%x", &i) != 1) ||
1598 !valid_act_opt(i)) {
d39c04ca 1599 fprintf(stderr,
ab197ca7 1600 "Invalid set action mask %s/0x%x\n",
d39c04ca 1601 optarg, i);
7425d456 1602 return 1;
d39c04ca
AB
1603 }
1604 act_mask_tmp = i;
1605 break;
d0ca268b 1606
d39c04ca 1607 case 'd':
e7c9f3ff
NS
1608 if (resize_devices(optarg) != 0)
1609 return 1;
d39c04ca
AB
1610 break;
1611
5270dddd
JA
1612 case 'r':
1613 relay_path = optarg;
1614 break;
1615
d5396421 1616 case 'o':
66efebf8 1617 output_name = optarg;
d5396421 1618 break;
bc39777c
JA
1619 case 'k':
1620 kill_running_trace = 1;
1621 break;
ece238a6
NS
1622 case 'w':
1623 stop_watch = atoi(optarg);
1624 if (stop_watch <= 0) {
1625 fprintf(stderr,
1626 "Invalid stopwatch value (%d secs)\n",
1627 stop_watch);
1628 return 1;
1629 }
1630 break;
57ea8602 1631 case 'V':
52724a0e
JA
1632 printf("%s version %s\n", argv[0], blktrace_version);
1633 return 0;
129aa440 1634 case 'b':
eb3c8108 1635 buf_size = strtoul(optarg, NULL, 10);
183a0855 1636 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 1637 fprintf(stderr,
eb3c8108 1638 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
1639 return 1;
1640 }
1641 buf_size <<= 10;
1642 break;
1643 case 'n':
eb3c8108 1644 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1645 if (buf_nr <= 0) {
1646 fprintf(stderr,
eb3c8108 1647 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1648 return 1;
1649 }
1650 break;
d1d7f15f
JA
1651 case 'D':
1652 output_dir = optarg;
1653 break;
8e86c98a
JA
1654 case 'h':
1655 net_mode = Net_client;
1656 strcpy(hostname, optarg);
1657 break;
1658 case 'l':
1659 net_mode = Net_server;
1660 break;
1661 case 'p':
1662 net_port = atoi(optarg);
1663 break;
32f18c48 1664 case 's':
f6fead25 1665 net_use_sendfile = 1;
32f18c48 1666 break;
d39c04ca 1667 default:
ee1f4158 1668 show_usage(argv[0]);
7425d456 1669 return 1;
d39c04ca
AB
1670 }
1671 }
1672
8e86c98a
JA
1673 setlocale(LC_NUMERIC, "en_US");
1674
1675 page_size = getpagesize();
1676
1677 if (net_mode == Net_server)
1678 return net_server();
1679
22cd0c02
JA
1680 while (optind < argc) {
1681 if (resize_devices(argv[optind++]) != 0)
1682 return 1;
1683 }
1684
e7c9f3ff 1685 if (ndevs == 0) {
ee1f4158 1686 show_usage(argv[0]);
7425d456 1687 return 1;
d39c04ca
AB
1688 }
1689
5270dddd
JA
1690 if (!relay_path)
1691 relay_path = default_relay_path;
1692
d5396421 1693 if (act_mask_tmp != 0)
d39c04ca 1694 act_mask = act_mask_tmp;
d0ca268b 1695
e3e74029
NS
1696 if (statfs(relay_path, &st) < 0) {
1697 perror("statfs");
1698 fprintf(stderr,"%s does not appear to be a valid path\n",
1699 relay_path);
1700 return 1;
64acacae 1701 } else if (st.f_type != (long) RELAYFS_TYPE) {
e3e74029 1702 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
d0ca268b 1703 relay_path);
7425d456 1704 return 1;
d0ca268b
JA
1705 }
1706
e7c9f3ff 1707 if (open_devices() != 0)
7425d456 1708 return 1;
bc39777c
JA
1709
1710 if (kill_running_trace) {
e7c9f3ff 1711 stop_all_traces();
7425d456 1712 return 0;
bc39777c
JA
1713 }
1714
e7c9f3ff
NS
1715 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1716 if (ncpus < 0) {
1717 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 1718 return 1;
d0ca268b
JA
1719 }
1720
d0ca268b
JA
1721 signal(SIGINT, handle_sigint);
1722 signal(SIGHUP, handle_sigint);
1723 signal(SIGTERM, handle_sigint);
ece238a6 1724 signal(SIGALRM, handle_sigint);
d0ca268b 1725
8e86c98a
JA
1726 if (net_mode == Net_client && net_setup_client())
1727 return 1;
1728
1729 if (start_devices() != 0)
1730 return 1;
1731
e7c9f3ff 1732 atexit(stop_all_tracing);
830fd65c 1733
ece238a6
NS
1734 if (stop_watch)
1735 alarm(stop_watch);
1736
b7106311 1737 wait_for_threads();
d0ca268b 1738
eb3c8108
JA
1739 if (!is_trace_stopped()) {
1740 trace_stopped = 1;
91816d54
JA
1741 stop_all_threads();
1742 stop_all_traces();
91816d54 1743 }
d0ca268b 1744
eb3c8108
JA
1745 show_stats();
1746
d0ca268b
JA
1747 return 0;
1748}
1749