[PATCH] blktrace: handle case where output directory is not set
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
eb3c8108 32#include <sys/poll.h>
b7106311 33#include <sys/mman.h>
8e86c98a 34#include <sys/socket.h>
d0ca268b
JA
35#include <stdio.h>
36#include <stdlib.h>
37#include <sched.h>
d39c04ca
AB
38#include <ctype.h>
39#include <getopt.h>
da39451f 40#include <errno.h>
8e86c98a
JA
41#include <netinet/in.h>
42#include <arpa/inet.h>
43#include <netdb.h>
32f18c48 44#include <sys/sendfile.h>
d0ca268b
JA
45
46#include "blktrace.h"
21f55651 47#include "barrier.h"
d0ca268b 48
a3225fed 49static char blktrace_version[] = "0.99.1";
52724a0e 50
8f551a39
JA
51/*
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
54 */
007c233c 55#define BUF_SIZE (512 * 1024)
d0ca268b
JA
56#define BUF_NR (4)
57
007c233c
JA
58#define OFILE_BUF (128 * 1024)
59
3d06efea 60#define DEBUGFS_TYPE 0x64626720
e3e74029 61
32f18c48 62#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
d5396421 63static struct option l_opts[] = {
5c86134e 64 {
d39c04ca 65 .name = "dev",
428683db 66 .has_arg = required_argument,
d39c04ca
AB
67 .flag = NULL,
68 .val = 'd'
69 },
5c86134e 70 {
d39c04ca 71 .name = "act-mask",
428683db 72 .has_arg = required_argument,
d39c04ca
AB
73 .flag = NULL,
74 .val = 'a'
75 },
5c86134e 76 {
d39c04ca 77 .name = "set-mask",
428683db 78 .has_arg = required_argument,
d39c04ca
AB
79 .flag = NULL,
80 .val = 'A'
81 },
5c86134e 82 {
5270dddd 83 .name = "relay",
428683db 84 .has_arg = required_argument,
5270dddd
JA
85 .flag = NULL,
86 .val = 'r'
87 },
d5396421
JA
88 {
89 .name = "output",
428683db 90 .has_arg = required_argument,
d5396421
JA
91 .flag = NULL,
92 .val = 'o'
93 },
bc39777c
JA
94 {
95 .name = "kill",
428683db 96 .has_arg = no_argument,
bc39777c
JA
97 .flag = NULL,
98 .val = 'k'
99 },
ece238a6
NS
100 {
101 .name = "stopwatch",
428683db 102 .has_arg = required_argument,
ece238a6
NS
103 .flag = NULL,
104 .val = 'w'
105 },
52724a0e
JA
106 {
107 .name = "version",
108 .has_arg = no_argument,
109 .flag = NULL,
57ea8602 110 .val = 'V'
52724a0e 111 },
129aa440 112 {
3f65c585 113 .name = "buffer-size",
129aa440
JA
114 .has_arg = required_argument,
115 .flag = NULL,
116 .val = 'b'
117 },
118 {
3f65c585 119 .name = "num-sub-buffers",
129aa440
JA
120 .has_arg = required_argument,
121 .flag = NULL,
122 .val = 'n'
123 },
d1d7f15f 124 {
3f65c585 125 .name = "output-dir",
d1d7f15f
JA
126 .has_arg = required_argument,
127 .flag = NULL,
128 .val = 'D'
129 },
8e86c98a
JA
130 {
131 .name = "listen",
132 .has_arg = no_argument,
133 .flag = NULL,
134 .val = 'l'
135 },
136 {
137 .name = "host",
138 .has_arg = required_argument,
139 .flag = NULL,
140 .val = 'h'
141 },
142 {
143 .name = "port",
144 .has_arg = required_argument,
145 .flag = NULL,
146 .val = 'p'
147 },
32f18c48 148 {
79971f43 149 .name = "no-sendfile",
32f18c48
JA
150 .has_arg = no_argument,
151 .flag = NULL,
152 .val = 's'
153 },
71ef8b7c
JA
154 {
155 .name = NULL,
156 }
d39c04ca
AB
157};
158
9db17354 159struct tip_subbuf {
9db17354 160 void *buf;
5be4bdaf
JA
161 unsigned int len;
162 unsigned int max_len;
9db17354
JA
163};
164
21f55651
JA
165#define FIFO_SIZE (1024) /* should be plenty big! */
166#define CL_SIZE (128) /* cache line, any bigger? */
167
168struct tip_subbuf_fifo {
169 int tail __attribute__((aligned(CL_SIZE)));
170 int head __attribute__((aligned(CL_SIZE)));
171 struct tip_subbuf *q[FIFO_SIZE];
172};
173
d0ca268b
JA
174struct thread_information {
175 int cpu;
176 pthread_t thread;
b9d4294e
JA
177
178 int fd;
a3e4d330 179 void *fd_buf;
b9d4294e
JA
180 char fn[MAXPATHLEN + 64];
181
007c233c
JA
182 FILE *ofile;
183 char *ofile_buffer;
32f18c48 184 off_t ofile_offset;
9db17354 185 int ofile_stdout;
8e86c98a 186 int ofile_mmap;
007c233c 187
0cc7d25e
JA
188 int (*get_subbuf)(struct thread_information *, unsigned int);
189 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
190 int (*read_data)(struct thread_information *, void *, unsigned int);
191
d0ca268b 192 unsigned long events_processed;
b7106311 193 unsigned long long data_read;
bcbeb60f 194 unsigned long long data_queued;
e7c9f3ff 195 struct device_information *device;
9db17354
JA
196
197 int exited;
198
b7106311
JA
199 /*
200 * piped fifo buffers
201 */
21f55651 202 struct tip_subbuf_fifo fifo;
7de86b12 203 struct tip_subbuf *leftover_ts;
b7106311
JA
204
205 /*
206 * mmap controlled output files
207 */
208 unsigned long long fs_size;
209 unsigned long long fs_max_size;
210 unsigned long fs_off;
211 void *fs_buf;
212 unsigned long fs_buf_len;
d0ca268b
JA
213};
214
e7c9f3ff
NS
215struct device_information {
216 int fd;
217 char *path;
218 char buts_name[32];
99c1f5ab 219 volatile int trace_started;
eb3c8108 220 unsigned long drop_count;
e7c9f3ff 221 struct thread_information *threads;
e0a1988b 222 struct net_connection *nc;
e7c9f3ff 223};
d0ca268b 224
e7c9f3ff 225static int ncpus;
d0ca268b 226static struct thread_information *thread_information;
e7c9f3ff
NS
227static int ndevs;
228static struct device_information *device_information;
229
230/* command line option globals */
3d06efea 231static char *debugfs_path;
d5396421 232static char *output_name;
d1d7f15f 233static char *output_dir;
5c86134e 234static int act_mask = ~0U;
bc39777c 235static int kill_running_trace;
eb3c8108
JA
236static unsigned long buf_size = BUF_SIZE;
237static unsigned long buf_nr = BUF_NR;
b7106311 238static unsigned int page_size;
d39c04ca 239
e7c9f3ff
NS
240#define is_done() (*(volatile int *)(&done))
241static volatile int done;
242
eb3c8108
JA
243#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
244static volatile int trace_stopped;
245
246#define is_stat_shown() (*(volatile int *)(&stat_shown))
247static volatile int stat_shown;
a3e4d330 248
8e86c98a
JA
249int data_is_native = -1;
250
72ca8801
NS
251static void exit_trace(int status);
252
99c1f5ab
JA
253#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
254#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
255
ce020676 256#define __for_each_dip(__d, __di, __e, __i) \
e0a1988b
JA
257 for (__i = 0, __d = __di; __i < __e; __i++, __d++)
258
259#define for_each_dip(__d, __i) \
ce020676 260 __for_each_dip(__d, device_information, ndevs, __i)
e0a1988b 261#define for_each_nc_dip(__nc, __d, __i) \
ce020676 262 __for_each_dip(__d, (__nc)->device_information, (__nc)->ndevs, __i)
99c1f5ab 263
ce020676
JA
264#define __for_each_tip(__d, __t, __ncpus, __j) \
265 for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
9db17354 266#define for_each_tip(__d, __t, __j) \
ce020676 267 __for_each_tip(__d, __t, ncpus, __j)
99c1f5ab 268
8e86c98a
JA
269/*
270 * networking stuff follows. we include a magic number so we know whether
271 * to endianness convert or not
272 */
273struct blktrace_net_hdr {
274 u32 magic; /* same as trace magic */
22cd0c02 275 char buts_name[32]; /* trace name */
8e86c98a 276 u32 cpu; /* for which cpu */
22cd0c02 277 u32 max_cpus;
8e86c98a
JA
278 u32 len; /* length of following trace data */
279};
280
281#define TRACE_NET_PORT (8462)
282
283enum {
284 Net_none = 0,
285 Net_server,
286 Net_client,
287};
288
289/*
290 * network cmd line params
291 */
292static char hostname[MAXHOSTNAMELEN];
293static int net_port = TRACE_NET_PORT;
294static int net_mode = 0;
79971f43 295static int net_use_sendfile = 1;
8e86c98a 296
e0a1988b
JA
297struct net_connection {
298 int in_fd;
299 time_t connect_time;
300 struct in_addr cl_in_addr;
301 struct device_information *device_information;
302 int ndevs;
303 int ncpus;
304 int connection_index;
305};
306
307#define NET_MAX_CONNECTIONS (1024)
308static struct net_connection net_connections[NET_MAX_CONNECTIONS];
309static int net_connects;
8e86c98a
JA
310static int net_out_fd = -1;
311
312static void handle_sigint(__attribute__((__unused__)) int sig)
313{
7035d92d
JA
314 struct device_information *dip;
315 int i;
316
317 /*
318 * stop trace so we can reap currently produced data
319 */
320 for_each_dip(dip, i) {
921b05fe
JA
321 if (dip->fd == -1)
322 continue;
7035d92d
JA
323 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
324 perror("BLKTRACESTOP");
325 }
326
8e86c98a
JA
327 done = 1;
328}
329
eb3c8108
JA
330static int get_dropped_count(const char *buts_name)
331{
332 int fd;
333 char tmp[MAXPATHLEN + 64];
334
335 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
3d06efea 336 debugfs_path, buts_name);
eb3c8108
JA
337
338 fd = open(tmp, O_RDONLY);
339 if (fd < 0) {
340 /*
341 * this may be ok, if the kernel doesn't support dropped counts
342 */
343 if (errno == ENOENT)
344 return 0;
345
346 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
347 return -1;
348 }
349
350 if (read(fd, tmp, sizeof(tmp)) < 0) {
351 perror(tmp);
352 close(fd);
353 return -1;
354 }
355
356 close(fd);
357
358 return atoi(tmp);
359}
360
e7c9f3ff 361static int start_trace(struct device_information *dip)
d0ca268b
JA
362{
363 struct blk_user_trace_setup buts;
364
1f79c4a0 365 memset(&buts, 0, sizeof(buts));
129aa440
JA
366 buts.buf_size = buf_size;
367 buts.buf_nr = buf_nr;
d39c04ca 368 buts.act_mask = act_mask;
d0ca268b 369
ed71a31e
JA
370 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
371 perror("BLKTRACESETUP");
372 return 1;
373 }
374
375 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
376 perror("BLKTRACESTART");
d0ca268b
JA
377 return 1;
378 }
379
e7c9f3ff 380 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 381 dip_set_tracing(dip, 1);
d0ca268b
JA
382 return 0;
383}
384
e7c9f3ff 385static void stop_trace(struct device_information *dip)
d0ca268b 386{
99c1f5ab
JA
387 if (dip_tracing(dip) || kill_running_trace) {
388 dip_set_tracing(dip, 0);
cf9208ea 389
7035d92d
JA
390 /*
391 * should be stopped, just don't complain if it isn't
392 */
393 ioctl(dip->fd, BLKTRACESTOP);
394
ed71a31e
JA
395 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
396 perror("BLKTRACETEARDOWN");
cf9208ea 397
e7c9f3ff 398 close(dip->fd);
cf9208ea 399 dip->fd = -1;
707b0914 400 }
d0ca268b
JA
401}
402
e7c9f3ff
NS
403static void stop_all_traces(void)
404{
405 struct device_information *dip;
406 int i;
407
eb3c8108
JA
408 for_each_dip(dip, i) {
409 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 410 stop_trace(dip);
eb3c8108 411 }
e7c9f3ff
NS
412}
413
7934e668 414static void wait_for_data(struct thread_information *tip, int events)
eb3c8108 415{
7934e668 416 struct pollfd pfd = { .fd = tip->fd, .events = events };
eb3c8108 417
9db17354 418 do {
7934e668
JA
419 if (poll(&pfd, 1, 100) < 0) {
420 perror("poll");
421 break;
422 }
423 if (pfd.revents & events)
9db17354
JA
424 break;
425 if (tip->ofile_stdout)
426 break;
427 } while (!is_done());
eb3c8108
JA
428}
429
0cc7d25e
JA
430static int read_data_file(struct thread_information *tip, void *buf,
431 unsigned int len)
d0ca268b 432{
ae9f71b3 433 int ret = 0;
bbabf03a 434
9db17354 435 do {
7934e668 436 wait_for_data(tip, POLLIN);
ae9f71b3 437
9db17354
JA
438 ret = read(tip->fd, buf, len);
439 if (!ret)
440 continue;
441 else if (ret > 0)
442 return ret;
443 else {
bbabf03a 444 if (errno != EAGAIN) {
a3e4d330
JA
445 perror(tip->fn);
446 fprintf(stderr,"Thread %d failed read of %s\n",
447 tip->cpu, tip->fn);
448 break;
449 }
9db17354 450 continue;
bbabf03a 451 }
9db17354 452 } while (!is_done());
8a43bac5 453
bbabf03a 454 return ret;
8e86c98a 455
8a43bac5
JA
456}
457
0cc7d25e
JA
458static int read_data_net(struct thread_information *tip, void *buf,
459 unsigned int len)
8e86c98a 460{
e0a1988b 461 struct net_connection *nc = tip->device->nc;
8e86c98a
JA
462 unsigned int bytes_left = len;
463 int ret = 0;
464
465 do {
e0a1988b 466 ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
8e86c98a
JA
467
468 if (!ret)
469 continue;
470 else if (ret < 0) {
471 if (errno != EAGAIN) {
472 perror(tip->fn);
473 fprintf(stderr, "server: failed read\n");
474 return 0;
475 }
476 continue;
477 } else {
478 buf += ret;
479 bytes_left -= ret;
480 }
481 } while (!is_done() && bytes_left);
482
410d7c62 483 return len - bytes_left;
8e86c98a
JA
484}
485
8e86c98a
JA
486static inline struct tip_subbuf *
487subbuf_fifo_dequeue(struct thread_information *tip)
a3e4d330 488{
21f55651
JA
489 const int head = tip->fifo.head;
490 const int next = (head + 1) & (FIFO_SIZE - 1);
491
492 if (head != tip->fifo.tail) {
493 struct tip_subbuf *ts = tip->fifo.q[head];
494
495 store_barrier();
496 tip->fifo.head = next;
497 return ts;
498 }
499
500 return NULL;
9db17354 501}
eb3c8108 502
21f55651
JA
503static inline int subbuf_fifo_queue(struct thread_information *tip,
504 struct tip_subbuf *ts)
9db17354 505{
21f55651
JA
506 const int tail = tip->fifo.tail;
507 const int next = (tail + 1) & (FIFO_SIZE - 1);
508
509 if (next != tip->fifo.head) {
510 tip->fifo.q[tail] = ts;
511 store_barrier();
512 tip->fifo.tail = next;
513 return 0;
514 }
515
516 fprintf(stderr, "fifo too small!\n");
517 return 1;
a3e4d330
JA
518}
519
b7106311
JA
520/*
521 * For file output, truncate and mmap the file appropriately
522 */
8e86c98a 523static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
b7106311
JA
524{
525 int ofd = fileno(tip->ofile);
526 int ret;
527
528 /*
529 * extend file, if we have to. use chunks of 16 subbuffers.
530 */
531 if (tip->fs_off + buf_size > tip->fs_buf_len) {
532 if (tip->fs_buf) {
5975d309 533 munlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
534 munmap(tip->fs_buf, tip->fs_buf_len);
535 tip->fs_buf = NULL;
536 }
537
538 tip->fs_off = tip->fs_size & (page_size - 1);
539 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
540 tip->fs_max_size += tip->fs_buf_len;
541
542 if (ftruncate(ofd, tip->fs_max_size) < 0) {
543 perror("ftruncate");
544 return -1;
545 }
546
547 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
548 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
549 if (tip->fs_buf == MAP_FAILED) {
550 perror("mmap");
551 return -1;
552 }
5975d309 553 mlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
554 }
555
7934e668 556 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
b7106311 557 if (ret >= 0) {
dbfbd6db 558 tip->data_read += ret;
b7106311
JA
559 tip->fs_size += ret;
560 tip->fs_off += ret;
561 return 0;
562 }
563
564 return -1;
565}
566
18eed2a7
JA
567/*
568 * Use the copy approach for pipes and network
569 */
570static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
571{
572 struct tip_subbuf *ts = malloc(sizeof(*ts));
573 int ret;
574
575 ts->buf = malloc(buf_size);
576 ts->max_len = maxlen;
577
7934e668 578 ret = tip->read_data(tip, ts->buf, ts->max_len);
18eed2a7
JA
579 if (ret > 0) {
580 ts->len = ret;
dbfbd6db 581 tip->data_read += ret;
7035d92d
JA
582 if (subbuf_fifo_queue(tip, ts))
583 return -1;
18eed2a7
JA
584 }
585
586 return ret;
587}
588
32f18c48
JA
589static int get_subbuf_sendfile(struct thread_information *tip,
590 unsigned int maxlen)
591{
11eedd9b 592 struct tip_subbuf *ts;
bcbeb60f
TZ
593 struct stat sb;
594 unsigned int ready;
32f18c48 595
7934e668 596 wait_for_data(tip, POLLMSG);
18eed2a7
JA
597
598 /*
599 * hack to get last data out, we can't use sendfile for that
600 */
601 if (is_done())
602 return get_subbuf(tip, maxlen);
603
bcbeb60f
TZ
604 if (fstat(tip->fd, &sb) < 0) {
605 perror("trace stat");
606 return -1;
607 }
608 ready = sb.st_size - tip->data_queued;
7934e668
JA
609 if (!ready) {
610 usleep(1000);
bcbeb60f 611 return 0;
7934e668 612 }
bcbeb60f 613
11629347
JA
614 ts = malloc(sizeof(*ts));
615 ts->buf = NULL;
616 ts->max_len = 0;
bcbeb60f
TZ
617 ts->len = ready;
618 tip->data_queued += ready;
1be42f3d 619
11629347
JA
620 if (subbuf_fifo_queue(tip, ts))
621 return -1;
11eedd9b 622
bcbeb60f 623 return ready;
32f18c48
JA
624}
625
9db17354 626static void close_thread(struct thread_information *tip)
a3e4d330 627{
9db17354
JA
628 if (tip->fd != -1)
629 close(tip->fd);
630 if (tip->ofile)
631 fclose(tip->ofile);
632 if (tip->ofile_buffer)
633 free(tip->ofile_buffer);
634 if (tip->fd_buf)
635 free(tip->fd_buf);
1c99bc21 636
9db17354
JA
637 tip->fd = -1;
638 tip->ofile = NULL;
639 tip->ofile_buffer = NULL;
640 tip->fd_buf = NULL;
a3e4d330
JA
641}
642
8e86c98a
JA
643static void tip_ftrunc_final(struct thread_information *tip)
644{
645 /*
646 * truncate to right size and cleanup mmap
647 */
c196b5f2 648 if (tip->ofile_mmap && tip->ofile) {
8e86c98a
JA
649 int ofd = fileno(tip->ofile);
650
651 if (tip->fs_buf)
652 munmap(tip->fs_buf, tip->fs_buf_len);
653
654 ftruncate(ofd, tip->fs_size);
655 }
656}
657
9db17354 658static void *thread_main(void *arg)
a3e4d330 659{
9db17354
JA
660 struct thread_information *tip = arg;
661 pid_t pid = getpid();
662 cpu_set_t cpu_mask;
a3e4d330 663
9db17354
JA
664 CPU_ZERO(&cpu_mask);
665 CPU_SET((tip->cpu), &cpu_mask);
a3e4d330 666
9db17354
JA
667 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
668 perror("sched_setaffinity");
669 exit_trace(1);
670 }
a3e4d330 671
9db17354 672 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
3d06efea 673 debugfs_path, tip->device->buts_name, tip->cpu);
9db17354
JA
674 tip->fd = open(tip->fn, O_RDONLY);
675 if (tip->fd < 0) {
676 perror(tip->fn);
677 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
678 tip->fn);
679 exit_trace(1);
a3e4d330
JA
680 }
681
b7106311 682 while (!is_done()) {
7035d92d 683 if (tip->get_subbuf(tip, buf_size) < 0)
0cc7d25e 684 break;
b7106311
JA
685 }
686
7035d92d
JA
687 /*
688 * trace is stopped, pull data until we get a short read
689 */
690 while (tip->get_subbuf(tip, buf_size) > 0)
691 ;
692
8e86c98a
JA
693 tip_ftrunc_final(tip);
694 tip->exited = 1;
695 return NULL;
696}
b7106311 697
8e86c98a
JA
698static int write_data_net(int fd, void *buf, unsigned int buf_len)
699{
700 unsigned int bytes_left = buf_len;
701 int ret;
b7106311 702
8e86c98a
JA
703 while (bytes_left) {
704 ret = send(fd, buf, bytes_left, 0);
705 if (ret < 0) {
706 perror("send");
707 return 1;
708 }
709
710 buf += ret;
711 bytes_left -= ret;
9db17354 712 }
a3e4d330 713
8e86c98a 714 return 0;
a3e4d330
JA
715}
716
32f18c48 717static int net_send_header(struct thread_information *tip, unsigned int len)
8e86c98a
JA
718{
719 struct blktrace_net_hdr hdr;
8e86c98a
JA
720
721 hdr.magic = BLK_IO_TRACE_MAGIC;
22cd0c02 722 strcpy(hdr.buts_name, tip->device->buts_name);
8e86c98a 723 hdr.cpu = tip->cpu;
22cd0c02 724 hdr.max_cpus = ncpus;
32f18c48 725 hdr.len = len;
8e86c98a 726
32f18c48
JA
727 return write_data_net(net_out_fd, &hdr, sizeof(hdr));
728}
8e86c98a 729
6a752c90
JA
730/*
731 * send header with 0 length to signal end-of-run
732 */
733static void net_client_send_close(void)
734{
7934e668 735 struct device_information *dip;
6a752c90 736 struct blktrace_net_hdr hdr;
7934e668 737 int i;
6a752c90 738
7934e668 739 for_each_dip(dip, i) {
7ab2f837
JA
740 hdr.magic = BLK_IO_TRACE_MAGIC;
741 hdr.max_cpus = ncpus;
742 hdr.len = 0;
7934e668 743 strcpy(hdr.buts_name, dip->buts_name);
7ab2f837
JA
744 hdr.cpu = get_dropped_count(dip->buts_name);
745
746 write_data_net(net_out_fd, &hdr, sizeof(hdr));
7934e668
JA
747 }
748
6a752c90
JA
749}
750
32f18c48
JA
751static int flush_subbuf_net(struct thread_information *tip,
752 struct tip_subbuf *ts)
753{
754 if (net_send_header(tip, ts->len))
7934e668 755 return -1;
22cd0c02 756 if (write_data_net(net_out_fd, ts->buf, ts->len))
7934e668 757 return -1;
8e86c98a 758
f0597a7e 759 free(ts->buf);
8e86c98a 760 free(ts);
7934e668 761 return 1;
8e86c98a
JA
762}
763
f6fead25
JA
764static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
765{
11629347
JA
766 int ret = sendfile(net_out_fd, tip->fd, NULL, ts->len);
767
768 if (ret < 0) {
769 perror("sendfile");
770 return 1;
771 } else if (ret < (int) ts->len) {
772 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
773 return 1;
774 }
775
776 return 0;
777}
778
32f18c48
JA
779static int flush_subbuf_sendfile(struct thread_information *tip,
780 struct tip_subbuf *ts)
781{
7934e668 782 int ret = -1;
18eed2a7
JA
783
784 /*
785 * currently we cannot use sendfile() on the last bytes read, as they
786 * may not be a full subbuffer. get_subbuf_sendfile() falls back to
787 * the read approach for those, so use send() to ship them out
788 */
789 if (ts->buf)
790 return flush_subbuf_net(tip, ts);
11eedd9b 791
f6fead25 792 if (net_send_header(tip, ts->len))
11629347 793 goto err;
f6fead25 794 if (net_sendfile(tip, ts))
11629347 795 goto err;
32f18c48 796
f6fead25 797 tip->data_read += ts->len;
e076d33b 798 tip->ofile_offset += buf_size;
7934e668 799 ret = 1;
11629347 800err:
32f18c48 801 free(ts);
11629347 802 return ret;
32f18c48
JA
803}
804
8e86c98a
JA
805static int write_data(struct thread_information *tip, void *buf,
806 unsigned int buf_len)
8a43bac5 807{
7126171a 808 int ret;
8a43bac5 809
6480258a
JA
810 if (!buf_len)
811 return 0;
812
7126171a
JA
813 while (1) {
814 ret = fwrite(buf, buf_len, 1, tip->ofile);
007c233c 815 if (ret == 1)
8a43bac5
JA
816 break;
817
db6fe5bc
JA
818 if (ret < 0) {
819 perror("write");
820 return 1;
8a43bac5 821 }
d0ca268b
JA
822 }
823
9db17354 824 if (tip->ofile_stdout)
7126171a
JA
825 fflush(tip->ofile);
826
8a43bac5
JA
827 return 0;
828}
829
8e86c98a
JA
830static int flush_subbuf_file(struct thread_information *tip,
831 struct tip_subbuf *ts)
8a43bac5 832{
9db17354
JA
833 unsigned int offset = 0;
834 struct blk_io_trace *t;
835 int pdu_len, events = 0;
8a43bac5 836
9db17354 837 /*
7de86b12 838 * surplus from last run
9db17354 839 */
7de86b12
AB
840 if (tip->leftover_ts) {
841 struct tip_subbuf *prev_ts = tip->leftover_ts;
842
9e8b753c 843 if (prev_ts->len + ts->len > prev_ts->max_len) {
7de86b12
AB
844 prev_ts->max_len += ts->len;
845 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
846 }
847
9e8b753c 848 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
7de86b12
AB
849 prev_ts->len += ts->len;
850
851 free(ts->buf);
852 free(ts);
853
854 ts = prev_ts;
855 tip->leftover_ts = NULL;
9db17354 856 }
d0ca268b 857
9db17354
JA
858 while (offset + sizeof(*t) <= ts->len) {
859 t = ts->buf + offset;
3a9d6c13 860
9cfa6c2b
AB
861 if (verify_trace(t)) {
862 write_data(tip, ts->buf, offset);
9db17354 863 return -1;
9cfa6c2b 864 }
3a9d6c13 865
9db17354 866 pdu_len = t->pdu_len;
3a9d6c13 867
9db17354 868 if (offset + sizeof(*t) + pdu_len > ts->len)
3a9d6c13 869 break;
4b5db44a 870
9db17354
JA
871 offset += sizeof(*t) + pdu_len;
872 tip->events_processed++;
b7106311 873 tip->data_read += sizeof(*t) + pdu_len;
9db17354 874 events++;
3a9d6c13
JA
875 }
876
9cfa6c2b
AB
877 if (write_data(tip, ts->buf, offset))
878 return -1;
879
3a9d6c13 880 /*
9db17354 881 * leftover bytes, save them for next time
3a9d6c13 882 */
9db17354 883 if (offset != ts->len) {
7de86b12 884 tip->leftover_ts = ts;
9e8b753c
JA
885 ts->len -= offset;
886 memmove(ts->buf, ts->buf + offset, ts->len);
7de86b12
AB
887 } else {
888 free(ts->buf);
889 free(ts);
9db17354 890 }
4b5db44a 891
9db17354 892 return events;
4b5db44a
JA
893}
894
9db17354 895static int write_tip_events(struct thread_information *tip)
d5396421 896{
21f55651 897 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
d5396421 898
0cc7d25e
JA
899 if (ts)
900 return tip->flush_subbuf(tip, ts);
91816d54 901
9db17354 902 return 0;
91816d54
JA
903}
904
9db17354
JA
905/*
906 * scans the tips we know and writes out the subbuffers we accumulate
907 */
908static void get_and_write_events(void)
d0ca268b 909{
9db17354
JA
910 struct device_information *dip;
911 struct thread_information *tip;
27223f19 912 int i, j, events, ret, tips_running;
d0ca268b 913
9db17354
JA
914 while (!is_done()) {
915 events = 0;
d0ca268b 916
9db17354
JA
917 for_each_dip(dip, i) {
918 for_each_tip(dip, tip, j) {
919 ret = write_tip_events(tip);
920 if (ret > 0)
921 events += ret;
922 }
923 }
d0ca268b 924
9db17354 925 if (!events)
7934e668 926 usleep(100000);
d0ca268b
JA
927 }
928
a3e4d330 929 /*
9db17354 930 * reap stored events
a3e4d330 931 */
9db17354
JA
932 do {
933 events = 0;
27223f19 934 tips_running = 0;
9db17354
JA
935 for_each_dip(dip, i) {
936 for_each_tip(dip, tip, j) {
937 ret = write_tip_events(tip);
938 if (ret > 0)
939 events += ret;
27223f19 940 tips_running += !tip->exited;
9db17354 941 }
69e65a9e 942 }
9db17354 943 usleep(10);
27223f19 944 } while (events || tips_running);
d0ca268b
JA
945}
946
b7106311
JA
947static void wait_for_threads(void)
948{
949 /*
8e86c98a
JA
950 * for piped or network output, poll and fetch data for writeout.
951 * for files, we just wait around for trace threads to exit
b7106311 952 */
8e86c98a
JA
953 if ((output_name && !strcmp(output_name, "-")) ||
954 net_mode == Net_client)
b7106311
JA
955 get_and_write_events();
956 else {
957 struct device_information *dip;
958 struct thread_information *tip;
959 int i, j, tips_running;
960
961 do {
962 tips_running = 0;
7934e668 963 usleep(100000);
b7106311
JA
964
965 for_each_dip(dip, i)
966 for_each_tip(dip, tip, j)
967 tips_running += !tip->exited;
968 } while (tips_running);
969 }
6a752c90
JA
970
971 if (net_mode == Net_client)
972 net_client_send_close();
b7106311
JA
973}
974
97159c02
JA
975static int fill_ofname(struct device_information *dip,
976 struct thread_information *tip, char *dst,
e3bf54d8 977 char *buts_name)
8e86c98a 978{
e3bf54d8 979 struct stat sb;
8e86c98a
JA
980 int len = 0;
981
982 if (output_dir)
983 len = sprintf(dst, "%s/", output_dir);
dd870ef6
AB
984 else
985 len = sprintf(dst, "./");
8e86c98a 986
e3bf54d8 987 if (net_mode == Net_server) {
e0a1988b
JA
988 struct net_connection *nc = dip->nc;
989
990 len += sprintf(dst + len, "%s-", inet_ntoa(nc->cl_in_addr));
991 len += strftime(dst + len, 64, "%F-%T/", gmtime(&nc->connect_time));
e3bf54d8
JA
992 }
993
994 if (stat(dst, &sb) < 0) {
995 if (errno != ENOENT) {
996 perror("stat");
997 return 1;
998 }
999 if (mkdir(dst, 0755) < 0) {
1000 perror(dst);
1001 fprintf(stderr, "Can't make output dir\n");
1002 return 1;
1003 }
1004 }
1005
8e86c98a 1006 if (output_name)
e3bf54d8 1007 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
8e86c98a 1008 else
e3bf54d8
JA
1009 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1010
1011 return 0;
8e86c98a
JA
1012}
1013
0cc7d25e
JA
1014static void fill_ops(struct thread_information *tip)
1015{
1016 /*
1017 * setup ops
1018 */
32f18c48 1019 if (net_mode == Net_client) {
36808255 1020 if (net_use_sendfile) {
32f18c48
JA
1021 tip->get_subbuf = get_subbuf_sendfile;
1022 tip->flush_subbuf = flush_subbuf_sendfile;
1023 } else {
1024 tip->get_subbuf = get_subbuf;
1025 tip->flush_subbuf = flush_subbuf_net;
1026 }
1027 } else {
1028 if (tip->ofile_mmap)
1029 tip->get_subbuf = mmap_subbuf;
1030 else
1031 tip->get_subbuf = get_subbuf;
0cc7d25e 1032
0cc7d25e 1033 tip->flush_subbuf = flush_subbuf_file;
32f18c48
JA
1034 }
1035
0cc7d25e
JA
1036 if (net_mode == Net_server)
1037 tip->read_data = read_data_net;
1038 else
1039 tip->read_data = read_data_file;
1040}
1041
ddf22842
JA
1042static int tip_open_output(struct device_information *dip,
1043 struct thread_information *tip)
d0ca268b 1044{
ddf22842 1045 int pipeline = output_name && !strcmp(output_name, "-");
8e86c98a 1046 int mode, vbuf_size;
e3bf54d8 1047 char op[128];
d0ca268b 1048
ddf22842
JA
1049 if (net_mode == Net_client) {
1050 tip->ofile = NULL;
1051 tip->ofile_stdout = 0;
1052 tip->ofile_mmap = 0;
0c0b75b4 1053 goto done;
ddf22842
JA
1054 } else if (pipeline) {
1055 tip->ofile = fdopen(STDOUT_FILENO, "w");
1056 tip->ofile_stdout = 1;
1057 tip->ofile_mmap = 0;
1058 mode = _IOLBF;
1059 vbuf_size = 512;
1060 } else {
97159c02 1061 if (fill_ofname(dip, tip, op, dip->buts_name))
e3bf54d8 1062 return 1;
ddf22842
JA
1063 tip->ofile = fopen(op, "w+");
1064 tip->ofile_stdout = 0;
1065 tip->ofile_mmap = 1;
1066 mode = _IOFBF;
1067 vbuf_size = OFILE_BUF;
1068 }
d5396421 1069
0c0b75b4 1070 if (tip->ofile == NULL) {
ddf22842
JA
1071 perror(op);
1072 return 1;
1073 }
d5396421 1074
0c0b75b4
JA
1075 tip->ofile_buffer = malloc(vbuf_size);
1076 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1077 perror("setvbuf");
1078 close_thread(tip);
1079 return 1;
ddf22842
JA
1080 }
1081
0c0b75b4 1082done:
ddf22842
JA
1083 fill_ops(tip);
1084 return 0;
1085}
007c233c 1086
ddf22842
JA
1087static int start_threads(struct device_information *dip)
1088{
1089 struct thread_information *tip;
1090 int j;
1091
1092 for_each_tip(dip, tip, j) {
1093 tip->cpu = j;
1094 tip->device = dip;
1095 tip->events_processed = 0;
11eedd9b 1096 tip->fd = -1;
ddf22842
JA
1097 memset(&tip->fifo, 0, sizeof(tip->fifo));
1098 tip->leftover_ts = NULL;
1099
1100 if (tip_open_output(dip, tip))
1101 return 1;
0cc7d25e 1102
9db17354 1103 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
e7c9f3ff 1104 perror("pthread_create");
007c233c 1105 close_thread(tip);
e7c9f3ff 1106 return 1;
d0ca268b
JA
1107 }
1108 }
1109
e7c9f3ff 1110 return 0;
d0ca268b
JA
1111}
1112
e7c9f3ff 1113static void stop_threads(struct device_information *dip)
3aabcd89 1114{
e7c9f3ff 1115 struct thread_information *tip;
91816d54 1116 unsigned long ret;
007c233c
JA
1117 int i;
1118
9db17354 1119 for_each_tip(dip, tip, i) {
91816d54 1120 (void) pthread_join(tip->thread, (void *) &ret);
9db17354
JA
1121 close_thread(tip);
1122 }
3aabcd89
JA
1123}
1124
e7c9f3ff 1125static void stop_all_threads(void)
72ca8801 1126{
e7c9f3ff 1127 struct device_information *dip;
72ca8801
NS
1128 int i;
1129
99c1f5ab 1130 for_each_dip(dip, i)
e7c9f3ff
NS
1131 stop_threads(dip);
1132}
1133
1134static void stop_all_tracing(void)
1135{
1136 struct device_information *dip;
91816d54 1137 int i;
007c233c 1138
91816d54 1139 for_each_dip(dip, i)
e7c9f3ff 1140 stop_trace(dip);
72ca8801
NS
1141}
1142
1143static void exit_trace(int status)
1144{
eb3c8108
JA
1145 if (!is_trace_stopped()) {
1146 trace_stopped = 1;
1147 stop_all_threads();
1148 stop_all_tracing();
1149 }
1150
72ca8801
NS
1151 exit(status);
1152}
1153
e7c9f3ff
NS
1154static int resize_devices(char *path)
1155{
1156 int size = (ndevs + 1) * sizeof(struct device_information);
1157
1158 device_information = realloc(device_information, size);
1159 if (!device_information) {
1160 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1161 return 1;
1162 }
1163 device_information[ndevs].path = path;
1164 ndevs++;
1165 return 0;
1166}
1167
1168static int open_devices(void)
d0ca268b 1169{
e7c9f3ff 1170 struct device_information *dip;
d0ca268b 1171 int i;
d0ca268b 1172
99c1f5ab 1173 for_each_dip(dip, i) {
cf9208ea 1174 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
1175 if (dip->fd < 0) {
1176 perror(dip->path);
1177 return 1;
1178 }
1179 }
99c1f5ab 1180
e7c9f3ff
NS
1181 return 0;
1182}
1183
1184static int start_devices(void)
1185{
1186 struct device_information *dip;
1187 int i, j, size;
1188
1189 size = ncpus * sizeof(struct thread_information);
1190 thread_information = malloc(size * ndevs);
1191 if (!thread_information) {
1192 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1193 return 1;
1194 }
d5396421 1195
99c1f5ab 1196 for_each_dip(dip, i) {
e7c9f3ff
NS
1197 if (start_trace(dip)) {
1198 close(dip->fd);
1199 fprintf(stderr, "Failed to start trace on %s\n",
1200 dip->path);
1201 break;
1202 }
1203 }
99c1f5ab 1204
e7c9f3ff 1205 if (i != ndevs) {
ce020676 1206 __for_each_dip(dip, device_information, i, j)
e7c9f3ff 1207 stop_trace(dip);
99c1f5ab 1208
e7c9f3ff
NS
1209 return 1;
1210 }
1211
99c1f5ab 1212 for_each_dip(dip, i) {
e7c9f3ff
NS
1213 dip->threads = thread_information + (i * ncpus);
1214 if (start_threads(dip)) {
1215 fprintf(stderr, "Failed to start worker threads\n");
1216 break;
1217 }
1218 }
99c1f5ab 1219
e7c9f3ff 1220 if (i != ndevs) {
ce020676 1221 __for_each_dip(dip, device_information, i, j)
e7c9f3ff 1222 stop_threads(dip);
99c1f5ab 1223 for_each_dip(dip, i)
e7c9f3ff 1224 stop_trace(dip);
99c1f5ab 1225
e7c9f3ff 1226 return 1;
d0ca268b
JA
1227 }
1228
e7c9f3ff 1229 return 0;
d0ca268b
JA
1230}
1231
e0a1988b 1232static void show_stats(struct device_information *dips, int ndips, int cpus)
e7c9f3ff 1233{
e7c9f3ff
NS
1234 struct device_information *dip;
1235 struct thread_information *tip;
b7106311 1236 unsigned long long events_processed, data_read;
eb3c8108 1237 unsigned long total_drops;
2f903295 1238 int i, j, no_stdout = 0;
eb3c8108
JA
1239
1240 if (is_stat_shown())
1241 return;
1242
2f903295
JA
1243 if (output_name && !strcmp(output_name, "-"))
1244 no_stdout = 1;
1245
eb3c8108 1246 stat_shown = 1;
428683db 1247
56070ea4 1248 total_drops = 0;
ce020676 1249 __for_each_dip(dip, dips, ndips, i) {
2f903295 1250 if (!no_stdout)
56070ea4 1251 printf("Device: %s\n", dip->path);
e7c9f3ff 1252 events_processed = 0;
b7106311 1253 data_read = 0;
ce020676 1254 __for_each_tip(dip, tip, cpus, j) {
2f903295 1255 if (!no_stdout)
b7106311
JA
1256 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1257 tip->cpu, tip->events_processed,
54824c20 1258 (tip->data_read + 1023) >> 10);
e7c9f3ff 1259 events_processed += tip->events_processed;
b7106311 1260 data_read += tip->data_read;
e7c9f3ff 1261 }
eb3c8108 1262 total_drops += dip->drop_count;
2f903295 1263 if (!no_stdout)
b7106311
JA
1264 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1265 events_processed, dip->drop_count,
18d8437d 1266 (data_read + 1023) >> 10);
e7c9f3ff 1267 }
56070ea4
JA
1268
1269 if (total_drops)
1270 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 1271}
52724a0e 1272
e0a1988b
JA
1273static struct device_information *net_get_dip(struct net_connection *nc,
1274 char *buts_name)
8e86c98a 1275{
22cd0c02 1276 struct device_information *dip;
8e86c98a
JA
1277 int i;
1278
e0a1988b
JA
1279 for (i = 0; i < nc->ndevs; i++) {
1280 dip = &nc->device_information[i];
8e86c98a 1281
22cd0c02
JA
1282 if (!strcmp(dip->buts_name, buts_name))
1283 return dip;
8e86c98a
JA
1284 }
1285
ce020676 1286 nc->device_information = realloc(nc->device_information, (nc->ndevs + 1) * sizeof(*dip));
e0a1988b 1287 dip = &nc->device_information[nc->ndevs];
921b05fe
JA
1288 memset(dip, 0, sizeof(*dip));
1289 dip->fd = -1;
e0a1988b 1290 dip->nc = nc;
22cd0c02 1291 strcpy(dip->buts_name, buts_name);
921b05fe 1292 dip->path = strdup(buts_name);
7ab2f837 1293 dip->trace_started = 1;
e0a1988b
JA
1294 nc->ndevs++;
1295 dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
1296 memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
22cd0c02
JA
1297
1298 /*
1299 * open all files
1300 */
e0a1988b 1301 for (i = 0; i < nc->ncpus; i++) {
22cd0c02 1302 struct thread_information *tip = &dip->threads[i];
8e86c98a 1303
22cd0c02 1304 tip->cpu = i;
22cd0c02 1305 tip->device = dip;
1366e53a 1306 tip->fd = -1;
8e86c98a 1307
ddf22842 1308 if (tip_open_output(dip, tip))
22cd0c02 1309 return NULL;
8e86c98a
JA
1310 }
1311
22cd0c02
JA
1312 return dip;
1313}
1314
e0a1988b
JA
1315static struct thread_information *net_get_tip(struct net_connection *nc,
1316 struct blktrace_net_hdr *bnh)
22cd0c02
JA
1317{
1318 struct device_information *dip;
1319
e0a1988b
JA
1320 nc->ncpus = bnh->max_cpus;
1321 dip = net_get_dip(nc, bnh->buts_name);
7ab2f837
JA
1322 if (!dip->trace_started) {
1323 fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
1324 return NULL;
1325 }
1326
22cd0c02 1327 return &dip->threads[bnh->cpu];
8e86c98a
JA
1328}
1329
e0a1988b
JA
1330static int net_get_header(struct net_connection *nc,
1331 struct blktrace_net_hdr *bnh)
8e86c98a 1332{
e0a1988b 1333 int fl = fcntl(nc->in_fd, F_GETFL);
8e86c98a
JA
1334 int bytes_left, ret;
1335 void *p = bnh;
1336
e0a1988b 1337 fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
8e86c98a
JA
1338 bytes_left = sizeof(*bnh);
1339 while (bytes_left && !is_done()) {
e0a1988b 1340 ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
8e86c98a
JA
1341 if (ret < 0) {
1342 if (errno != EAGAIN) {
1343 perror("recv header");
1344 return 1;
1345 }
7934e668 1346 usleep(1000);
8e86c98a
JA
1347 continue;
1348 } else if (!ret) {
7934e668 1349 usleep(1000);
8e86c98a
JA
1350 continue;
1351 } else {
1352 p += ret;
1353 bytes_left -= ret;
1354 }
1355 }
e0a1988b 1356 fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
227f89ff 1357 return bytes_left;
8e86c98a
JA
1358}
1359
e0a1988b
JA
1360/*
1361 * finalize a net client: truncate files, show stats, cleanup, etc
1362 */
1363static void net_client_done(struct net_connection *nc)
1364{
1365 struct device_information *dip;
1366 struct thread_information *tip;
1367 struct net_connection *last_nc;
1368 int i, j;
1369
ce020676
JA
1370 for_each_nc_dip(nc, dip, i)
1371 __for_each_tip(dip, tip, nc->ncpus, j)
e0a1988b 1372 tip_ftrunc_final(tip);
e0a1988b
JA
1373
1374 show_stats(nc->device_information, nc->ndevs, nc->ncpus);
1375
1376 /*
1377 * cleanup for next run
1378 */
1379 for_each_nc_dip(nc, dip, i) {
ce020676 1380 __for_each_tip(dip, tip, nc->ncpus, j) {
c196b5f2
JA
1381 if (tip->ofile)
1382 fclose(tip->ofile);
e0a1988b
JA
1383 }
1384
1385 free(dip->threads);
1386 free(dip->path);
1387 }
1388
1389 free(nc->device_information);
1390 nc->device_information = NULL;
1391 nc->ncpus = nc->ndevs = 0;
1392
1393 close(nc->in_fd);
1394 nc->in_fd = -1;
1395
1396 net_connects--;
1397
1398 /*
1399 * now put last entry where this one was, a little nasty since we
1400 * need to adjust dip->nc as well
1401 */
1402 if (nc->connection_index != net_connects) {
1403 last_nc = &net_connections[net_connects];
e0a1988b
JA
1404 *nc = *last_nc;
1405 for_each_nc_dip(nc, dip, i)
1406 dip->nc = nc;
1407 }
1408
1409 stat_shown = 0;
1410}
1411
1412/*
1413 * handle incoming events from a net client
1414 */
1415static int net_client_data(struct net_connection *nc)
8e86c98a
JA
1416{
1417 struct thread_information *tip;
1418 struct blktrace_net_hdr bnh;
1419
e0a1988b 1420 if (net_get_header(nc, &bnh))
8e86c98a
JA
1421 return 1;
1422
1423 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1424 fprintf(stderr, "server: received data is bad\n");
1425 return 1;
1426 }
1427
1428 if (!data_is_native) {
227f89ff 1429 bnh.magic = be32_to_cpu(bnh.magic);
8e86c98a
JA
1430 bnh.cpu = be32_to_cpu(bnh.cpu);
1431 bnh.len = be32_to_cpu(bnh.len);
1432 }
1433
227f89ff
JA
1434 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1435 fprintf(stderr, "server: bad data magic\n");
1436 return 1;
1437 }
1438
6a752c90
JA
1439 /*
1440 * len == 0 means that the other end signalled end-of-run
1441 */
1442 if (!bnh.len) {
7934e668
JA
1443 /*
1444 * overload cpu count with dropped events
1445 */
1446 struct device_information *dip;
1447
e0a1988b 1448 dip = net_get_dip(nc, bnh.buts_name);
7934e668 1449 dip->drop_count = bnh.cpu;
7ab2f837 1450 dip->trace_started = 0;
7934e668 1451
e0a1988b
JA
1452 printf("server: end of run for %s\n", dip->buts_name);
1453 net_client_done(nc);
1454 return 0;
6a752c90
JA
1455 }
1456
e0a1988b 1457 tip = net_get_tip(nc, &bnh);
8e86c98a
JA
1458 if (!tip)
1459 return 1;
1460
1461 if (mmap_subbuf(tip, bnh.len))
1462 return 1;
1463
1464 return 0;
1465}
1466
e0a1988b 1467static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
659bcc3f 1468{
e0a1988b
JA
1469 socklen_t socklen = sizeof(*addr);
1470 struct net_connection *nc;
659bcc3f 1471
e0a1988b
JA
1472 if (net_connects == NET_MAX_CONNECTIONS) {
1473 fprintf(stderr, "server: no more connections allowed\n");
1474 return;
659bcc3f 1475 }
659bcc3f 1476
e0a1988b 1477 nc = &net_connections[net_connects];
ce020676 1478 memset(nc, 0, sizeof(*nc));
e0a1988b
JA
1479
1480 nc->in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
1481 if (nc->in_fd < 0) {
659bcc3f 1482 perror("accept");
e0a1988b 1483 return;
659bcc3f
JA
1484 }
1485
e0a1988b
JA
1486 printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
1487 time(&nc->connect_time);
1488 nc->connection_index = net_connects;
1489 nc->cl_in_addr = addr->sin_addr;
1490 net_connects++;
1491}
1492
1493/*
1494 * event driven loop, handle new incoming connections and data from
1495 * existing connections
1496 */
1497static void net_server_handle_connections(int listen_fd,
1498 struct sockaddr_in *addr)
1499{
1500 struct pollfd pfds[NET_MAX_CONNECTIONS + 1];
1501 int i, events;
1502
1503 printf("server: waiting for connections...\n");
1504
1505 while (!is_done()) {
1506 /*
1507 * the zero entry is for incoming connections, remaining
1508 * entries for clients
1509 */
1510 pfds[0].fd = listen_fd;
1511 pfds[0].events = POLLIN;
1512 for (i = 0; i < net_connects; i++) {
1513 pfds[i + 1].fd = net_connections[i].in_fd;
1514 pfds[i + 1].events = POLLIN;
1515 }
1516
1517 events = poll(pfds, 1 + net_connects, -1);
1518 if (events < 0) {
1519 if (errno == EINTR)
1520 continue;
1521
1522 perror("poll");
1523 break;
1524 } else if (!events)
1525 continue;
1526
1527 if (pfds[0].revents & POLLIN) {
1528 net_add_connection(listen_fd, addr);
1529 events--;
1530 }
1531
1532 for (i = 0; events && i < net_connects; i++) {
1533 if (pfds[i + 1].revents & POLLIN) {
1534 net_client_data(&net_connections[i]);
1535 events--;
1536 }
1537 }
1538 }
659bcc3f
JA
1539}
1540
8e86c98a
JA
1541/*
1542 * Start here when we are in server mode - just fetch data from the network
1543 * and dump to files
1544 */
1545static int net_server(void)
1546{
1547 struct sockaddr_in addr;
e0a1988b 1548 int fd, opt;
8e86c98a
JA
1549
1550 fd = socket(AF_INET, SOCK_STREAM, 0);
1551 if (fd < 0) {
1552 perror("server: socket");
1553 return 1;
1554 }
1555
1556 opt = 1;
1557 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1558 perror("setsockopt");
1559 return 1;
1560 }
1561
1562 memset(&addr, 0, sizeof(addr));
1563 addr.sin_family = AF_INET;
1564 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1565 addr.sin_port = htons(net_port);
1566
1567 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1568 perror("bind");
1569 return 1;
1570 }
1571
1572 if (listen(fd, 1) < 0) {
1573 perror("listen");
1574 return 1;
1575 }
1576
e0a1988b
JA
1577 net_server_handle_connections(fd, &addr);
1578 return 0;
8e86c98a
JA
1579}
1580
1581/*
1582 * Setup outgoing network connection where we will transmit data
1583 */
1584static int net_setup_client(void)
1585{
1586 struct sockaddr_in addr;
1587 int fd;
1588
1589 fd = socket(AF_INET, SOCK_STREAM, 0);
1590 if (fd < 0) {
1591 perror("client: socket");
1592 return 1;
1593 }
1594
1595 memset(&addr, 0, sizeof(addr));
1596 addr.sin_family = AF_INET;
1597 addr.sin_port = htons(net_port);
1598
1599 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1600 struct hostent *hent = gethostbyname(hostname);
1601 if (!hent) {
1602 perror("gethostbyname");
1603 return 1;
1604 }
1605
1606 memcpy(&addr.sin_addr, hent->h_addr, 4);
1607 strcpy(hostname, hent->h_name);
1608 }
1609
1610 printf("blktrace: connecting to %s\n", hostname);
1611
1612 if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1613 perror("client: connect");
1614 return 1;
1615 }
1616
1617 printf("blktrace: connected!\n");
1618 net_out_fd = fd;
1619 return 0;
1620}
1621
52724a0e 1622static char usage_str[] = \
3d06efea 1623 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
52724a0e
JA
1624 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1625 "\t-d Use specified device. May also be given last after options\n" \
3d06efea 1626 "\t-r Path to mounted debugfs, defaults to /debug\n" \
52724a0e 1627 "\t-o File(s) to send output to\n" \
d1d7f15f 1628 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
1629 "\t-k Kill a running trace\n" \
1630 "\t-w Stop after defined time, in seconds\n" \
1631 "\t-a Only trace specified actions. See documentation\n" \
1632 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
1633 "\t-b Sub buffer size in KiB\n" \
1634 "\t-n Number of sub buffers\n" \
f531b94d
JA
1635 "\t-l Run in network listen mode (blktrace server)\n" \
1636 "\t-h Run in network client mode, connecting to the given host\n" \
1637 "\t-p Network port to use (default 8462)\n" \
1638 "\t-s Make the network client use sendfile() to transfer data\n" \
1639 "\t-V Print program version info\n\n";
52724a0e 1640
ee1f4158
NS
1641static void show_usage(char *program)
1642{
52724a0e 1643 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 1644}
d0ca268b
JA
1645
1646int main(int argc, char *argv[])
1647{
3d06efea 1648 static char default_debugfs_path[] = "/debug";
e3e74029 1649 struct statfs st;
d39c04ca 1650 int i, c;
ece238a6 1651 int stop_watch = 0;
d39c04ca
AB
1652 int act_mask_tmp = 0;
1653
1654 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1655 switch (c) {
1656 case 'a':
1657 i = find_mask_map(optarg);
1658 if (i < 0) {
ab197ca7 1659 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 1660 optarg);
7425d456 1661 return 1;
d39c04ca
AB
1662 }
1663 act_mask_tmp |= i;
1664 break;
1665
1666 case 'A':
98f8386b
AB
1667 if ((sscanf(optarg, "%x", &i) != 1) ||
1668 !valid_act_opt(i)) {
d39c04ca 1669 fprintf(stderr,
ab197ca7 1670 "Invalid set action mask %s/0x%x\n",
d39c04ca 1671 optarg, i);
7425d456 1672 return 1;
d39c04ca
AB
1673 }
1674 act_mask_tmp = i;
1675 break;
d0ca268b 1676
d39c04ca 1677 case 'd':
e7c9f3ff
NS
1678 if (resize_devices(optarg) != 0)
1679 return 1;
d39c04ca
AB
1680 break;
1681
5270dddd 1682 case 'r':
3d06efea 1683 debugfs_path = optarg;
5270dddd
JA
1684 break;
1685
d5396421 1686 case 'o':
66efebf8 1687 output_name = optarg;
d5396421 1688 break;
bc39777c
JA
1689 case 'k':
1690 kill_running_trace = 1;
1691 break;
ece238a6
NS
1692 case 'w':
1693 stop_watch = atoi(optarg);
1694 if (stop_watch <= 0) {
1695 fprintf(stderr,
1696 "Invalid stopwatch value (%d secs)\n",
1697 stop_watch);
1698 return 1;
1699 }
1700 break;
57ea8602 1701 case 'V':
52724a0e
JA
1702 printf("%s version %s\n", argv[0], blktrace_version);
1703 return 0;
129aa440 1704 case 'b':
eb3c8108 1705 buf_size = strtoul(optarg, NULL, 10);
183a0855 1706 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 1707 fprintf(stderr,
eb3c8108 1708 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
1709 return 1;
1710 }
1711 buf_size <<= 10;
1712 break;
1713 case 'n':
eb3c8108 1714 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1715 if (buf_nr <= 0) {
1716 fprintf(stderr,
eb3c8108 1717 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1718 return 1;
1719 }
1720 break;
d1d7f15f
JA
1721 case 'D':
1722 output_dir = optarg;
1723 break;
8e86c98a
JA
1724 case 'h':
1725 net_mode = Net_client;
1726 strcpy(hostname, optarg);
1727 break;
1728 case 'l':
1729 net_mode = Net_server;
1730 break;
1731 case 'p':
1732 net_port = atoi(optarg);
1733 break;
32f18c48 1734 case 's':
79971f43 1735 net_use_sendfile = 0;
32f18c48 1736 break;
d39c04ca 1737 default:
ee1f4158 1738 show_usage(argv[0]);
7425d456 1739 return 1;
d39c04ca
AB
1740 }
1741 }
1742
8e86c98a
JA
1743 setlocale(LC_NUMERIC, "en_US");
1744
1745 page_size = getpagesize();
1746
1747 if (net_mode == Net_server)
1748 return net_server();
1749
22cd0c02
JA
1750 while (optind < argc) {
1751 if (resize_devices(argv[optind++]) != 0)
1752 return 1;
1753 }
1754
e7c9f3ff 1755 if (ndevs == 0) {
ee1f4158 1756 show_usage(argv[0]);
7425d456 1757 return 1;
d39c04ca
AB
1758 }
1759
d5396421 1760 if (act_mask_tmp != 0)
d39c04ca 1761 act_mask = act_mask_tmp;
d0ca268b 1762
3d06efea
JA
1763 if (!debugfs_path)
1764 debugfs_path = default_debugfs_path;
1765
1766 if (statfs(debugfs_path, &st) < 0) {
e3e74029
NS
1767 perror("statfs");
1768 fprintf(stderr,"%s does not appear to be a valid path\n",
3d06efea 1769 debugfs_path);
e3e74029 1770 return 1;
3d06efea
JA
1771 } else if (st.f_type != (long) DEBUGFS_TYPE) {
1772 fprintf(stderr,"%s does not appear to be a debug filesystem\n",
1773 debugfs_path);
7425d456 1774 return 1;
d0ca268b
JA
1775 }
1776
e7c9f3ff 1777 if (open_devices() != 0)
7425d456 1778 return 1;
bc39777c
JA
1779
1780 if (kill_running_trace) {
e7c9f3ff 1781 stop_all_traces();
7425d456 1782 return 0;
bc39777c
JA
1783 }
1784
e7c9f3ff
NS
1785 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1786 if (ncpus < 0) {
1787 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 1788 return 1;
d0ca268b
JA
1789 }
1790
d0ca268b
JA
1791 signal(SIGINT, handle_sigint);
1792 signal(SIGHUP, handle_sigint);
1793 signal(SIGTERM, handle_sigint);
ece238a6 1794 signal(SIGALRM, handle_sigint);
d0ca268b 1795
8e86c98a
JA
1796 if (net_mode == Net_client && net_setup_client())
1797 return 1;
1798
1799 if (start_devices() != 0)
1800 return 1;
1801
e7c9f3ff 1802 atexit(stop_all_tracing);
830fd65c 1803
ece238a6
NS
1804 if (stop_watch)
1805 alarm(stop_watch);
1806
b7106311 1807 wait_for_threads();
d0ca268b 1808
eb3c8108
JA
1809 if (!is_trace_stopped()) {
1810 trace_stopped = 1;
91816d54
JA
1811 stop_all_threads();
1812 stop_all_traces();
91816d54 1813 }
d0ca268b 1814
e0a1988b 1815 show_stats(device_information, ndevs, ncpus);
eb3c8108 1816
d0ca268b
JA
1817 return 0;
1818}
1819