[PATCH] Ignore -o (output_name) when in server mode
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd 4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
46e37c55 5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
d956a2cd
JA
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
d0ca268b
JA
21 */
22#include <pthread.h>
23#include <sys/types.h>
24#include <sys/stat.h>
25#include <unistd.h>
26#include <locale.h>
27#include <signal.h>
28#include <fcntl.h>
29#include <string.h>
30#include <sys/ioctl.h>
b9d4294e 31#include <sys/param.h>
e3e74029 32#include <sys/statfs.h>
eb3c8108 33#include <sys/poll.h>
b7106311 34#include <sys/mman.h>
8e86c98a 35#include <sys/socket.h>
d0ca268b
JA
36#include <stdio.h>
37#include <stdlib.h>
38#include <sched.h>
d39c04ca
AB
39#include <ctype.h>
40#include <getopt.h>
da39451f 41#include <errno.h>
8e86c98a
JA
42#include <netinet/in.h>
43#include <arpa/inet.h>
44#include <netdb.h>
32f18c48 45#include <sys/sendfile.h>
d0ca268b
JA
46
47#include "blktrace.h"
21f55651 48#include "barrier.h"
d0ca268b 49
a3225fed 50static char blktrace_version[] = "0.99.1";
52724a0e 51
8f551a39
JA
52/*
53 * You may want to increase this even more, if you are logging at a high
54 * rate and see skipped/missed events
55 */
007c233c 56#define BUF_SIZE (512 * 1024)
d0ca268b
JA
57#define BUF_NR (4)
58
007c233c
JA
59#define OFILE_BUF (128 * 1024)
60
3d06efea 61#define DEBUGFS_TYPE 0x64626720
e3e74029 62
32f18c48 63#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
d5396421 64static struct option l_opts[] = {
5c86134e 65 {
d39c04ca 66 .name = "dev",
428683db 67 .has_arg = required_argument,
d39c04ca
AB
68 .flag = NULL,
69 .val = 'd'
70 },
5c86134e 71 {
d39c04ca 72 .name = "act-mask",
428683db 73 .has_arg = required_argument,
d39c04ca
AB
74 .flag = NULL,
75 .val = 'a'
76 },
5c86134e 77 {
d39c04ca 78 .name = "set-mask",
428683db 79 .has_arg = required_argument,
d39c04ca
AB
80 .flag = NULL,
81 .val = 'A'
82 },
5c86134e 83 {
5270dddd 84 .name = "relay",
428683db 85 .has_arg = required_argument,
5270dddd
JA
86 .flag = NULL,
87 .val = 'r'
88 },
d5396421
JA
89 {
90 .name = "output",
428683db 91 .has_arg = required_argument,
d5396421
JA
92 .flag = NULL,
93 .val = 'o'
94 },
bc39777c
JA
95 {
96 .name = "kill",
428683db 97 .has_arg = no_argument,
bc39777c
JA
98 .flag = NULL,
99 .val = 'k'
100 },
ece238a6
NS
101 {
102 .name = "stopwatch",
428683db 103 .has_arg = required_argument,
ece238a6
NS
104 .flag = NULL,
105 .val = 'w'
106 },
52724a0e
JA
107 {
108 .name = "version",
109 .has_arg = no_argument,
110 .flag = NULL,
57ea8602 111 .val = 'V'
52724a0e 112 },
129aa440 113 {
3f65c585 114 .name = "buffer-size",
129aa440
JA
115 .has_arg = required_argument,
116 .flag = NULL,
117 .val = 'b'
118 },
119 {
3f65c585 120 .name = "num-sub-buffers",
129aa440
JA
121 .has_arg = required_argument,
122 .flag = NULL,
123 .val = 'n'
124 },
d1d7f15f 125 {
3f65c585 126 .name = "output-dir",
d1d7f15f
JA
127 .has_arg = required_argument,
128 .flag = NULL,
129 .val = 'D'
130 },
8e86c98a
JA
131 {
132 .name = "listen",
133 .has_arg = no_argument,
134 .flag = NULL,
135 .val = 'l'
136 },
137 {
138 .name = "host",
139 .has_arg = required_argument,
140 .flag = NULL,
141 .val = 'h'
142 },
143 {
144 .name = "port",
145 .has_arg = required_argument,
146 .flag = NULL,
147 .val = 'p'
148 },
32f18c48 149 {
79971f43 150 .name = "no-sendfile",
32f18c48
JA
151 .has_arg = no_argument,
152 .flag = NULL,
153 .val = 's'
154 },
71ef8b7c
JA
155 {
156 .name = NULL,
157 }
d39c04ca
AB
158};
159
9db17354 160struct tip_subbuf {
9db17354 161 void *buf;
5be4bdaf
JA
162 unsigned int len;
163 unsigned int max_len;
9db17354
JA
164};
165
21f55651
JA
166#define FIFO_SIZE (1024) /* should be plenty big! */
167#define CL_SIZE (128) /* cache line, any bigger? */
168
169struct tip_subbuf_fifo {
170 int tail __attribute__((aligned(CL_SIZE)));
171 int head __attribute__((aligned(CL_SIZE)));
172 struct tip_subbuf *q[FIFO_SIZE];
173};
174
d0ca268b
JA
175struct thread_information {
176 int cpu;
177 pthread_t thread;
b9d4294e
JA
178
179 int fd;
a3e4d330 180 void *fd_buf;
b9d4294e
JA
181 char fn[MAXPATHLEN + 64];
182
007c233c
JA
183 FILE *ofile;
184 char *ofile_buffer;
32f18c48 185 off_t ofile_offset;
9db17354 186 int ofile_stdout;
8e86c98a 187 int ofile_mmap;
007c233c 188
0cc7d25e
JA
189 int (*get_subbuf)(struct thread_information *, unsigned int);
190 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
191 int (*read_data)(struct thread_information *, void *, unsigned int);
192
d0ca268b 193 unsigned long events_processed;
b7106311 194 unsigned long long data_read;
bcbeb60f 195 unsigned long long data_queued;
e7c9f3ff 196 struct device_information *device;
9db17354
JA
197
198 int exited;
199
b7106311
JA
200 /*
201 * piped fifo buffers
202 */
21f55651 203 struct tip_subbuf_fifo fifo;
7de86b12 204 struct tip_subbuf *leftover_ts;
b7106311
JA
205
206 /*
207 * mmap controlled output files
208 */
209 unsigned long long fs_size;
210 unsigned long long fs_max_size;
211 unsigned long fs_off;
212 void *fs_buf;
213 unsigned long fs_buf_len;
ff11d54c
TZ
214
215 struct net_connection *nc;
d0ca268b
JA
216};
217
e7c9f3ff
NS
218struct device_information {
219 int fd;
220 char *path;
221 char buts_name[32];
99c1f5ab 222 volatile int trace_started;
eb3c8108 223 unsigned long drop_count;
e7c9f3ff 224 struct thread_information *threads;
6a6d3f0f
TZ
225 unsigned long buf_size;
226 unsigned long buf_nr;
227 unsigned int page_size;
ff11d54c
TZ
228
229 struct cl_host *ch;
230 u32 cl_id;
231 time_t cl_connect_time;
e7c9f3ff 232};
d0ca268b 233
e7c9f3ff 234static int ncpus;
d0ca268b 235static struct thread_information *thread_information;
e7c9f3ff
NS
236static int ndevs;
237static struct device_information *device_information;
238
239/* command line option globals */
3d06efea 240static char *debugfs_path;
d5396421 241static char *output_name;
d1d7f15f 242static char *output_dir;
5c86134e 243static int act_mask = ~0U;
bc39777c 244static int kill_running_trace;
eb3c8108
JA
245static unsigned long buf_size = BUF_SIZE;
246static unsigned long buf_nr = BUF_NR;
b7106311 247static unsigned int page_size;
d39c04ca 248
e7c9f3ff
NS
249#define is_done() (*(volatile int *)(&done))
250static volatile int done;
251
eb3c8108
JA
252#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
253static volatile int trace_stopped;
254
255#define is_stat_shown() (*(volatile int *)(&stat_shown))
256static volatile int stat_shown;
a3e4d330 257
8e86c98a
JA
258int data_is_native = -1;
259
72ca8801
NS
260static void exit_trace(int status);
261
99c1f5ab
JA
262#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
263#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
264
ce020676 265#define __for_each_dip(__d, __di, __e, __i) \
e0a1988b
JA
266 for (__i = 0, __d = __di; __i < __e; __i++, __d++)
267
268#define for_each_dip(__d, __i) \
ce020676 269 __for_each_dip(__d, device_information, ndevs, __i)
e0a1988b 270#define for_each_nc_dip(__nc, __d, __i) \
ff11d54c 271 __for_each_dip(__d, (__nc)->ch->device_information, (__nc)->ch->ndevs, __i)
99c1f5ab 272
ce020676
JA
273#define __for_each_tip(__d, __t, __ncpus, __j) \
274 for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
9db17354 275#define for_each_tip(__d, __t, __j) \
ce020676 276 __for_each_tip(__d, __t, ncpus, __j)
ff11d54c
TZ
277#define for_each_cl_host(__c) \
278 for (__c = cl_host_list; __c; __c = __c->list_next)
99c1f5ab 279
8e86c98a
JA
280/*
281 * networking stuff follows. we include a magic number so we know whether
282 * to endianness convert or not
283 */
284struct blktrace_net_hdr {
285 u32 magic; /* same as trace magic */
22cd0c02 286 char buts_name[32]; /* trace name */
8e86c98a 287 u32 cpu; /* for which cpu */
22cd0c02 288 u32 max_cpus;
8e86c98a 289 u32 len; /* length of following trace data */
ff11d54c 290 u32 cl_id; /* id for set of client per-cpu connections */
6a6d3f0f
TZ
291 u32 buf_size; /* client buf_size for this trace */
292 u32 buf_nr; /* client buf_nr for this trace */
293 u32 page_size; /* client page_size for this trace */
8e86c98a
JA
294};
295
296#define TRACE_NET_PORT (8462)
297
298enum {
299 Net_none = 0,
300 Net_server,
301 Net_client,
302};
303
304/*
305 * network cmd line params
306 */
307static char hostname[MAXHOSTNAMELEN];
308static int net_port = TRACE_NET_PORT;
309static int net_mode = 0;
79971f43 310static int net_use_sendfile = 1;
8e86c98a 311
ff11d54c
TZ
312struct cl_host {
313 struct cl_host *list_next;
e0a1988b 314 struct in_addr cl_in_addr;
ff11d54c
TZ
315 struct net_connection *net_connections;
316 int nconn;
e0a1988b
JA
317 struct device_information *device_information;
318 int ndevs;
319 int ncpus;
ff11d54c 320 int ndevs_done;
e0a1988b
JA
321};
322
ff11d54c
TZ
323struct net_connection {
324 int in_fd;
325 struct pollfd pfd;
326 time_t connect_time;
327 struct cl_host *ch;
328 int ncpus;
329};
330
331#define NET_MAX_CL_HOSTS (1024)
332static struct cl_host *cl_host_list;
333static int cl_hosts;
e0a1988b 334static int net_connects;
ff11d54c
TZ
335
336static int *net_out_fd;
8e86c98a
JA
337
338static void handle_sigint(__attribute__((__unused__)) int sig)
339{
7035d92d
JA
340 struct device_information *dip;
341 int i;
342
343 /*
344 * stop trace so we can reap currently produced data
345 */
346 for_each_dip(dip, i) {
921b05fe
JA
347 if (dip->fd == -1)
348 continue;
7035d92d
JA
349 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
350 perror("BLKTRACESTOP");
351 }
352
8e86c98a
JA
353 done = 1;
354}
355
eb3c8108
JA
356static int get_dropped_count(const char *buts_name)
357{
358 int fd;
359 char tmp[MAXPATHLEN + 64];
360
361 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
3d06efea 362 debugfs_path, buts_name);
eb3c8108
JA
363
364 fd = open(tmp, O_RDONLY);
365 if (fd < 0) {
366 /*
367 * this may be ok, if the kernel doesn't support dropped counts
368 */
369 if (errno == ENOENT)
370 return 0;
371
372 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
373 return -1;
374 }
375
376 if (read(fd, tmp, sizeof(tmp)) < 0) {
377 perror(tmp);
378 close(fd);
379 return -1;
380 }
381
382 close(fd);
383
384 return atoi(tmp);
385}
386
e7c9f3ff 387static int start_trace(struct device_information *dip)
d0ca268b
JA
388{
389 struct blk_user_trace_setup buts;
390
1f79c4a0 391 memset(&buts, 0, sizeof(buts));
6a6d3f0f
TZ
392 buts.buf_size = dip->buf_size;
393 buts.buf_nr = dip->buf_nr;
d39c04ca 394 buts.act_mask = act_mask;
d0ca268b 395
ed71a31e
JA
396 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
397 perror("BLKTRACESETUP");
398 return 1;
399 }
400
401 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
402 perror("BLKTRACESTART");
d0ca268b
JA
403 return 1;
404 }
405
e7c9f3ff 406 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 407 dip_set_tracing(dip, 1);
d0ca268b
JA
408 return 0;
409}
410
e7c9f3ff 411static void stop_trace(struct device_information *dip)
d0ca268b 412{
99c1f5ab
JA
413 if (dip_tracing(dip) || kill_running_trace) {
414 dip_set_tracing(dip, 0);
cf9208ea 415
7035d92d
JA
416 /*
417 * should be stopped, just don't complain if it isn't
418 */
419 ioctl(dip->fd, BLKTRACESTOP);
420
ed71a31e
JA
421 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
422 perror("BLKTRACETEARDOWN");
cf9208ea 423
e7c9f3ff 424 close(dip->fd);
cf9208ea 425 dip->fd = -1;
707b0914 426 }
d0ca268b
JA
427}
428
e7c9f3ff
NS
429static void stop_all_traces(void)
430{
431 struct device_information *dip;
432 int i;
433
eb3c8108
JA
434 for_each_dip(dip, i) {
435 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 436 stop_trace(dip);
eb3c8108 437 }
e7c9f3ff
NS
438}
439
ff11d54c 440static void wait_for_data(struct thread_information *tip, int timeout)
eb3c8108 441{
ff11d54c 442 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
eb3c8108 443
ff11d54c
TZ
444 while (!is_done()) {
445 if (poll(&pfd, 1, timeout) < 0) {
7934e668
JA
446 perror("poll");
447 break;
448 }
ff11d54c 449 if (pfd.revents & POLLIN)
9db17354
JA
450 break;
451 if (tip->ofile_stdout)
452 break;
ff11d54c 453 }
eb3c8108
JA
454}
455
0cc7d25e
JA
456static int read_data_file(struct thread_information *tip, void *buf,
457 unsigned int len)
d0ca268b 458{
ae9f71b3 459 int ret = 0;
bbabf03a 460
9db17354 461 do {
ff11d54c 462 wait_for_data(tip, 100);
ae9f71b3 463
9db17354
JA
464 ret = read(tip->fd, buf, len);
465 if (!ret)
466 continue;
467 else if (ret > 0)
468 return ret;
469 else {
bbabf03a 470 if (errno != EAGAIN) {
a3e4d330
JA
471 perror(tip->fn);
472 fprintf(stderr,"Thread %d failed read of %s\n",
473 tip->cpu, tip->fn);
474 break;
475 }
9db17354 476 continue;
bbabf03a 477 }
9db17354 478 } while (!is_done());
8a43bac5 479
bbabf03a 480 return ret;
8e86c98a 481
8a43bac5
JA
482}
483
0cc7d25e
JA
484static int read_data_net(struct thread_information *tip, void *buf,
485 unsigned int len)
8e86c98a 486{
ff11d54c 487 struct net_connection *nc = tip->nc;
8e86c98a
JA
488 unsigned int bytes_left = len;
489 int ret = 0;
490
491 do {
e0a1988b 492 ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
8e86c98a
JA
493
494 if (!ret)
495 continue;
496 else if (ret < 0) {
497 if (errno != EAGAIN) {
498 perror(tip->fn);
499 fprintf(stderr, "server: failed read\n");
500 return 0;
501 }
502 continue;
503 } else {
504 buf += ret;
505 bytes_left -= ret;
506 }
507 } while (!is_done() && bytes_left);
508
410d7c62 509 return len - bytes_left;
8e86c98a
JA
510}
511
8e86c98a
JA
512static inline struct tip_subbuf *
513subbuf_fifo_dequeue(struct thread_information *tip)
a3e4d330 514{
21f55651
JA
515 const int head = tip->fifo.head;
516 const int next = (head + 1) & (FIFO_SIZE - 1);
517
518 if (head != tip->fifo.tail) {
519 struct tip_subbuf *ts = tip->fifo.q[head];
520
521 store_barrier();
522 tip->fifo.head = next;
523 return ts;
524 }
525
526 return NULL;
9db17354 527}
eb3c8108 528
21f55651
JA
529static inline int subbuf_fifo_queue(struct thread_information *tip,
530 struct tip_subbuf *ts)
9db17354 531{
21f55651
JA
532 const int tail = tip->fifo.tail;
533 const int next = (tail + 1) & (FIFO_SIZE - 1);
534
535 if (next != tip->fifo.head) {
536 tip->fifo.q[tail] = ts;
537 store_barrier();
538 tip->fifo.tail = next;
539 return 0;
540 }
541
542 fprintf(stderr, "fifo too small!\n");
543 return 1;
a3e4d330
JA
544}
545
b7106311
JA
546/*
547 * For file output, truncate and mmap the file appropriately
548 */
8e86c98a 549static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
b7106311
JA
550{
551 int ofd = fileno(tip->ofile);
552 int ret;
6a6d3f0f 553 unsigned long nr;
b7106311
JA
554
555 /*
556 * extend file, if we have to. use chunks of 16 subbuffers.
557 */
6a6d3f0f 558 if (tip->fs_off + maxlen > tip->fs_buf_len) {
b7106311 559 if (tip->fs_buf) {
5975d309 560 munlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
561 munmap(tip->fs_buf, tip->fs_buf_len);
562 tip->fs_buf = NULL;
563 }
564
6a6d3f0f
TZ
565 tip->fs_off = tip->fs_size & (tip->device->page_size - 1);
566 nr = max(16, tip->device->buf_nr);
567 tip->fs_buf_len = (nr * tip->device->buf_size) - tip->fs_off;
b7106311
JA
568 tip->fs_max_size += tip->fs_buf_len;
569
570 if (ftruncate(ofd, tip->fs_max_size) < 0) {
571 perror("ftruncate");
572 return -1;
573 }
574
575 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
576 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
577 if (tip->fs_buf == MAP_FAILED) {
578 perror("mmap");
579 return -1;
580 }
5975d309 581 mlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
582 }
583
7934e668 584 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
b7106311 585 if (ret >= 0) {
dbfbd6db 586 tip->data_read += ret;
b7106311
JA
587 tip->fs_size += ret;
588 tip->fs_off += ret;
589 return 0;
590 }
591
592 return -1;
593}
594
18eed2a7
JA
595/*
596 * Use the copy approach for pipes and network
597 */
598static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
599{
600 struct tip_subbuf *ts = malloc(sizeof(*ts));
601 int ret;
602
6a6d3f0f 603 ts->buf = malloc(tip->device->buf_size);
18eed2a7
JA
604 ts->max_len = maxlen;
605
7934e668 606 ret = tip->read_data(tip, ts->buf, ts->max_len);
18eed2a7
JA
607 if (ret > 0) {
608 ts->len = ret;
dbfbd6db 609 tip->data_read += ret;
7035d92d 610 if (subbuf_fifo_queue(tip, ts))
2f064793
JA
611 ret = -1;
612 }
613
614 if (ret <= 0) {
615 free(ts->buf);
616 free(ts);
18eed2a7
JA
617 }
618
619 return ret;
620}
621
9db17354 622static void close_thread(struct thread_information *tip)
a3e4d330 623{
9db17354
JA
624 if (tip->fd != -1)
625 close(tip->fd);
626 if (tip->ofile)
627 fclose(tip->ofile);
628 if (tip->ofile_buffer)
629 free(tip->ofile_buffer);
630 if (tip->fd_buf)
631 free(tip->fd_buf);
1c99bc21 632
9db17354
JA
633 tip->fd = -1;
634 tip->ofile = NULL;
635 tip->ofile_buffer = NULL;
636 tip->fd_buf = NULL;
a3e4d330
JA
637}
638
8e86c98a
JA
639static void tip_ftrunc_final(struct thread_information *tip)
640{
641 /*
642 * truncate to right size and cleanup mmap
643 */
c196b5f2 644 if (tip->ofile_mmap && tip->ofile) {
8e86c98a
JA
645 int ofd = fileno(tip->ofile);
646
647 if (tip->fs_buf)
648 munmap(tip->fs_buf, tip->fs_buf_len);
649
650 ftruncate(ofd, tip->fs_size);
651 }
652}
653
9db17354 654static void *thread_main(void *arg)
a3e4d330 655{
9db17354
JA
656 struct thread_information *tip = arg;
657 pid_t pid = getpid();
658 cpu_set_t cpu_mask;
a3e4d330 659
9db17354
JA
660 CPU_ZERO(&cpu_mask);
661 CPU_SET((tip->cpu), &cpu_mask);
a3e4d330 662
9db17354
JA
663 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
664 perror("sched_setaffinity");
665 exit_trace(1);
666 }
a3e4d330 667
9db17354 668 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
3d06efea 669 debugfs_path, tip->device->buts_name, tip->cpu);
9db17354
JA
670 tip->fd = open(tip->fn, O_RDONLY);
671 if (tip->fd < 0) {
672 perror(tip->fn);
673 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
674 tip->fn);
675 exit_trace(1);
a3e4d330
JA
676 }
677
b7106311 678 while (!is_done()) {
6a6d3f0f 679 if (tip->get_subbuf(tip, tip->device->buf_size) < 0)
0cc7d25e 680 break;
b7106311
JA
681 }
682
7035d92d
JA
683 /*
684 * trace is stopped, pull data until we get a short read
685 */
6a6d3f0f 686 while (tip->get_subbuf(tip, tip->device->buf_size) > 0)
7035d92d
JA
687 ;
688
8e86c98a
JA
689 tip_ftrunc_final(tip);
690 tip->exited = 1;
691 return NULL;
692}
b7106311 693
8e86c98a
JA
694static int write_data_net(int fd, void *buf, unsigned int buf_len)
695{
696 unsigned int bytes_left = buf_len;
697 int ret;
b7106311 698
8e86c98a
JA
699 while (bytes_left) {
700 ret = send(fd, buf, bytes_left, 0);
701 if (ret < 0) {
702 perror("send");
703 return 1;
704 }
705
706 buf += ret;
707 bytes_left -= ret;
9db17354 708 }
a3e4d330 709
8e86c98a 710 return 0;
a3e4d330
JA
711}
712
32f18c48 713static int net_send_header(struct thread_information *tip, unsigned int len)
8e86c98a
JA
714{
715 struct blktrace_net_hdr hdr;
8e86c98a
JA
716
717 hdr.magic = BLK_IO_TRACE_MAGIC;
22cd0c02 718 strcpy(hdr.buts_name, tip->device->buts_name);
8e86c98a 719 hdr.cpu = tip->cpu;
22cd0c02 720 hdr.max_cpus = ncpus;
32f18c48 721 hdr.len = len;
ff11d54c 722 hdr.cl_id = getpid();
6a6d3f0f
TZ
723 hdr.buf_size = tip->device->buf_size;
724 hdr.buf_nr = tip->device->buf_nr;
725 hdr.page_size = tip->device->page_size;
726
ff11d54c 727 return write_data_net(net_out_fd[tip->cpu], &hdr, sizeof(hdr));
32f18c48 728}
8e86c98a 729
6a752c90
JA
730/*
731 * send header with 0 length to signal end-of-run
732 */
733static void net_client_send_close(void)
734{
7934e668 735 struct device_information *dip;
6a752c90 736 struct blktrace_net_hdr hdr;
7934e668 737 int i;
6a752c90 738
7934e668 739 for_each_dip(dip, i) {
7ab2f837
JA
740 hdr.magic = BLK_IO_TRACE_MAGIC;
741 hdr.max_cpus = ncpus;
742 hdr.len = 0;
7934e668 743 strcpy(hdr.buts_name, dip->buts_name);
7ab2f837 744 hdr.cpu = get_dropped_count(dip->buts_name);
ff11d54c 745 hdr.cl_id = getpid();
6a6d3f0f
TZ
746 hdr.buf_size = dip->buf_size;
747 hdr.buf_nr = dip->buf_nr;
748 hdr.page_size = dip->page_size;
7ab2f837 749
ff11d54c 750 write_data_net(net_out_fd[0], &hdr, sizeof(hdr));
7934e668
JA
751 }
752
6a752c90
JA
753}
754
32f18c48
JA
755static int flush_subbuf_net(struct thread_information *tip,
756 struct tip_subbuf *ts)
757{
758 if (net_send_header(tip, ts->len))
7934e668 759 return -1;
ff11d54c 760 if (write_data_net(net_out_fd[tip->cpu], ts->buf, ts->len))
7934e668 761 return -1;
8e86c98a 762
f0597a7e 763 free(ts->buf);
8e86c98a 764 free(ts);
7934e668 765 return 1;
8e86c98a
JA
766}
767
f6fead25
JA
768static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
769{
ff11d54c 770 int ret = sendfile(net_out_fd[tip->cpu], tip->fd, NULL, ts->len);
11629347
JA
771
772 if (ret < 0) {
773 perror("sendfile");
774 return 1;
775 } else if (ret < (int) ts->len) {
776 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
777 return 1;
778 }
779
780 return 0;
781}
782
32f18c48
JA
783static int flush_subbuf_sendfile(struct thread_information *tip,
784 struct tip_subbuf *ts)
785{
7934e668 786 int ret = -1;
18eed2a7 787
f6fead25 788 if (net_send_header(tip, ts->len))
11629347 789 goto err;
f6fead25 790 if (net_sendfile(tip, ts))
11629347 791 goto err;
32f18c48 792
f6fead25 793 tip->data_read += ts->len;
7934e668 794 ret = 1;
11629347 795err:
32f18c48 796 free(ts);
11629347 797 return ret;
32f18c48
JA
798}
799
ff11d54c 800static int get_subbuf_sendfile(struct thread_information *tip,
d042efdf 801 __attribute__((__unused__)) unsigned int maxlen)
ff11d54c
TZ
802{
803 struct tip_subbuf *ts;
804 struct stat sb;
805 unsigned int ready;
ff11d54c 806
2689be58 807 wait_for_data(tip, -1);
ff11d54c
TZ
808
809 if (fstat(tip->fd, &sb) < 0) {
810 perror("trace stat");
811 return -1;
812 }
4aeec019 813
ff11d54c
TZ
814 ready = sb.st_size - tip->data_queued;
815 if (!ready) {
816 usleep(1000);
817 return 0;
818 }
819
820 ts = malloc(sizeof(*ts));
821 ts->buf = NULL;
822 ts->max_len = 0;
823 ts->len = ready;
824 tip->data_queued += ready;
825
d042efdf
JA
826 if (flush_subbuf_sendfile(tip, ts) < 0)
827 return -1;
ff11d54c
TZ
828
829 return ready;
830}
831
8e86c98a
JA
832static int write_data(struct thread_information *tip, void *buf,
833 unsigned int buf_len)
8a43bac5 834{
7126171a 835 int ret;
8a43bac5 836
6480258a
JA
837 if (!buf_len)
838 return 0;
839
1452478f
JA
840 ret = fwrite(buf, buf_len, 1, tip->ofile);
841 if (ferror(tip->ofile) || ret != 1) {
842 perror("fwrite");
843 clearerr(tip->ofile);
844 return 1;
d0ca268b
JA
845 }
846
9db17354 847 if (tip->ofile_stdout)
7126171a
JA
848 fflush(tip->ofile);
849
8a43bac5
JA
850 return 0;
851}
852
8e86c98a
JA
853static int flush_subbuf_file(struct thread_information *tip,
854 struct tip_subbuf *ts)
8a43bac5 855{
9db17354
JA
856 unsigned int offset = 0;
857 struct blk_io_trace *t;
858 int pdu_len, events = 0;
8a43bac5 859
9db17354 860 /*
7de86b12 861 * surplus from last run
9db17354 862 */
7de86b12
AB
863 if (tip->leftover_ts) {
864 struct tip_subbuf *prev_ts = tip->leftover_ts;
865
9e8b753c 866 if (prev_ts->len + ts->len > prev_ts->max_len) {
7de86b12
AB
867 prev_ts->max_len += ts->len;
868 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
869 }
870
9e8b753c 871 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
7de86b12
AB
872 prev_ts->len += ts->len;
873
874 free(ts->buf);
875 free(ts);
876
877 ts = prev_ts;
878 tip->leftover_ts = NULL;
9db17354 879 }
d0ca268b 880
9db17354
JA
881 while (offset + sizeof(*t) <= ts->len) {
882 t = ts->buf + offset;
3a9d6c13 883
9cfa6c2b
AB
884 if (verify_trace(t)) {
885 write_data(tip, ts->buf, offset);
9db17354 886 return -1;
9cfa6c2b 887 }
3a9d6c13 888
9db17354 889 pdu_len = t->pdu_len;
3a9d6c13 890
9db17354 891 if (offset + sizeof(*t) + pdu_len > ts->len)
3a9d6c13 892 break;
4b5db44a 893
9db17354
JA
894 offset += sizeof(*t) + pdu_len;
895 tip->events_processed++;
b7106311 896 tip->data_read += sizeof(*t) + pdu_len;
9db17354 897 events++;
3a9d6c13
JA
898 }
899
9cfa6c2b
AB
900 if (write_data(tip, ts->buf, offset))
901 return -1;
902
3a9d6c13 903 /*
9db17354 904 * leftover bytes, save them for next time
3a9d6c13 905 */
9db17354 906 if (offset != ts->len) {
7de86b12 907 tip->leftover_ts = ts;
9e8b753c
JA
908 ts->len -= offset;
909 memmove(ts->buf, ts->buf + offset, ts->len);
7de86b12
AB
910 } else {
911 free(ts->buf);
912 free(ts);
9db17354 913 }
4b5db44a 914
9db17354 915 return events;
4b5db44a
JA
916}
917
9db17354 918static int write_tip_events(struct thread_information *tip)
d5396421 919{
21f55651 920 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
d5396421 921
0cc7d25e
JA
922 if (ts)
923 return tip->flush_subbuf(tip, ts);
91816d54 924
9db17354 925 return 0;
91816d54
JA
926}
927
9db17354
JA
928/*
929 * scans the tips we know and writes out the subbuffers we accumulate
930 */
931static void get_and_write_events(void)
d0ca268b 932{
9db17354
JA
933 struct device_information *dip;
934 struct thread_information *tip;
27223f19 935 int i, j, events, ret, tips_running;
d0ca268b 936
9db17354
JA
937 while (!is_done()) {
938 events = 0;
d0ca268b 939
9db17354
JA
940 for_each_dip(dip, i) {
941 for_each_tip(dip, tip, j) {
942 ret = write_tip_events(tip);
943 if (ret > 0)
944 events += ret;
945 }
946 }
d0ca268b 947
9db17354 948 if (!events)
7934e668 949 usleep(100000);
d0ca268b
JA
950 }
951
a3e4d330 952 /*
9db17354 953 * reap stored events
a3e4d330 954 */
9db17354
JA
955 do {
956 events = 0;
27223f19 957 tips_running = 0;
9db17354
JA
958 for_each_dip(dip, i) {
959 for_each_tip(dip, tip, j) {
960 ret = write_tip_events(tip);
961 if (ret > 0)
962 events += ret;
27223f19 963 tips_running += !tip->exited;
9db17354 964 }
69e65a9e 965 }
9db17354 966 usleep(10);
27223f19 967 } while (events || tips_running);
d0ca268b
JA
968}
969
b7106311
JA
970static void wait_for_threads(void)
971{
972 /*
8e86c98a
JA
973 * for piped or network output, poll and fetch data for writeout.
974 * for files, we just wait around for trace threads to exit
b7106311 975 */
8e86c98a 976 if ((output_name && !strcmp(output_name, "-")) ||
ff11d54c 977 ((net_mode == Net_client) && !net_use_sendfile))
b7106311
JA
978 get_and_write_events();
979 else {
980 struct device_information *dip;
981 struct thread_information *tip;
982 int i, j, tips_running;
983
984 do {
985 tips_running = 0;
7934e668 986 usleep(100000);
b7106311
JA
987
988 for_each_dip(dip, i)
989 for_each_tip(dip, tip, j)
990 tips_running += !tip->exited;
991 } while (tips_running);
992 }
6a752c90
JA
993
994 if (net_mode == Net_client)
995 net_client_send_close();
b7106311
JA
996}
997
97159c02
JA
998static int fill_ofname(struct device_information *dip,
999 struct thread_information *tip, char *dst,
e3bf54d8 1000 char *buts_name)
8e86c98a 1001{
e3bf54d8 1002 struct stat sb;
8e86c98a
JA
1003 int len = 0;
1004
1005 if (output_dir)
1006 len = sprintf(dst, "%s/", output_dir);
dd870ef6
AB
1007 else
1008 len = sprintf(dst, "./");
8e86c98a 1009
e3bf54d8 1010 if (net_mode == Net_server) {
ff11d54c 1011 struct net_connection *nc = tip->nc;
e0a1988b 1012
ff11d54c
TZ
1013 len += sprintf(dst + len, "%s-", inet_ntoa(nc->ch->cl_in_addr));
1014 len += strftime(dst + len, 64, "%F-%T/", gmtime(&dip->cl_connect_time));
e3bf54d8
JA
1015 }
1016
1017 if (stat(dst, &sb) < 0) {
1018 if (errno != ENOENT) {
1019 perror("stat");
1020 return 1;
1021 }
1022 if (mkdir(dst, 0755) < 0) {
1023 perror(dst);
1024 fprintf(stderr, "Can't make output dir\n");
1025 return 1;
1026 }
1027 }
1028
8e86c98a 1029 if (output_name)
e3bf54d8 1030 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
8e86c98a 1031 else
e3bf54d8
JA
1032 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1033
1034 return 0;
8e86c98a
JA
1035}
1036
0cc7d25e
JA
1037static void fill_ops(struct thread_information *tip)
1038{
1039 /*
1040 * setup ops
1041 */
32f18c48 1042 if (net_mode == Net_client) {
36808255 1043 if (net_use_sendfile) {
32f18c48 1044 tip->get_subbuf = get_subbuf_sendfile;
ff11d54c 1045 tip->flush_subbuf = NULL;
32f18c48
JA
1046 } else {
1047 tip->get_subbuf = get_subbuf;
1048 tip->flush_subbuf = flush_subbuf_net;
1049 }
1050 } else {
1051 if (tip->ofile_mmap)
1052 tip->get_subbuf = mmap_subbuf;
1053 else
1054 tip->get_subbuf = get_subbuf;
0cc7d25e 1055
0cc7d25e 1056 tip->flush_subbuf = flush_subbuf_file;
32f18c48
JA
1057 }
1058
0cc7d25e
JA
1059 if (net_mode == Net_server)
1060 tip->read_data = read_data_net;
1061 else
1062 tip->read_data = read_data_file;
1063}
1064
ddf22842
JA
1065static int tip_open_output(struct device_information *dip,
1066 struct thread_information *tip)
d0ca268b 1067{
ddf22842 1068 int pipeline = output_name && !strcmp(output_name, "-");
8e86c98a 1069 int mode, vbuf_size;
e3bf54d8 1070 char op[128];
d0ca268b 1071
ddf22842
JA
1072 if (net_mode == Net_client) {
1073 tip->ofile = NULL;
1074 tip->ofile_stdout = 0;
1075 tip->ofile_mmap = 0;
0c0b75b4 1076 goto done;
ddf22842
JA
1077 } else if (pipeline) {
1078 tip->ofile = fdopen(STDOUT_FILENO, "w");
1079 tip->ofile_stdout = 1;
1080 tip->ofile_mmap = 0;
1081 mode = _IOLBF;
1082 vbuf_size = 512;
1083 } else {
97159c02 1084 if (fill_ofname(dip, tip, op, dip->buts_name))
e3bf54d8 1085 return 1;
ddf22842
JA
1086 tip->ofile = fopen(op, "w+");
1087 tip->ofile_stdout = 0;
1088 tip->ofile_mmap = 1;
1089 mode = _IOFBF;
1090 vbuf_size = OFILE_BUF;
1091 }
d5396421 1092
0c0b75b4 1093 if (tip->ofile == NULL) {
ddf22842
JA
1094 perror(op);
1095 return 1;
1096 }
d5396421 1097
0c0b75b4
JA
1098 tip->ofile_buffer = malloc(vbuf_size);
1099 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1100 perror("setvbuf");
1101 close_thread(tip);
1102 return 1;
ddf22842
JA
1103 }
1104
0c0b75b4 1105done:
ddf22842
JA
1106 fill_ops(tip);
1107 return 0;
1108}
007c233c 1109
ddf22842
JA
1110static int start_threads(struct device_information *dip)
1111{
1112 struct thread_information *tip;
1113 int j;
1114
1115 for_each_tip(dip, tip, j) {
1116 tip->cpu = j;
1117 tip->device = dip;
1118 tip->events_processed = 0;
11eedd9b 1119 tip->fd = -1;
ddf22842
JA
1120 memset(&tip->fifo, 0, sizeof(tip->fifo));
1121 tip->leftover_ts = NULL;
1122
1123 if (tip_open_output(dip, tip))
1124 return 1;
0cc7d25e 1125
9db17354 1126 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
e7c9f3ff 1127 perror("pthread_create");
007c233c 1128 close_thread(tip);
e7c9f3ff 1129 return 1;
d0ca268b
JA
1130 }
1131 }
1132
e7c9f3ff 1133 return 0;
d0ca268b
JA
1134}
1135
e7c9f3ff 1136static void stop_threads(struct device_information *dip)
3aabcd89 1137{
e7c9f3ff 1138 struct thread_information *tip;
91816d54 1139 unsigned long ret;
007c233c
JA
1140 int i;
1141
9db17354 1142 for_each_tip(dip, tip, i) {
91816d54 1143 (void) pthread_join(tip->thread, (void *) &ret);
9db17354
JA
1144 close_thread(tip);
1145 }
3aabcd89
JA
1146}
1147
e7c9f3ff 1148static void stop_all_threads(void)
72ca8801 1149{
e7c9f3ff 1150 struct device_information *dip;
72ca8801
NS
1151 int i;
1152
99c1f5ab 1153 for_each_dip(dip, i)
e7c9f3ff
NS
1154 stop_threads(dip);
1155}
1156
1157static void stop_all_tracing(void)
1158{
1159 struct device_information *dip;
91816d54 1160 int i;
007c233c 1161
91816d54 1162 for_each_dip(dip, i)
e7c9f3ff 1163 stop_trace(dip);
72ca8801
NS
1164}
1165
1166static void exit_trace(int status)
1167{
eb3c8108
JA
1168 if (!is_trace_stopped()) {
1169 trace_stopped = 1;
1170 stop_all_threads();
1171 stop_all_tracing();
1172 }
1173
72ca8801
NS
1174 exit(status);
1175}
1176
e7c9f3ff
NS
1177static int resize_devices(char *path)
1178{
1179 int size = (ndevs + 1) * sizeof(struct device_information);
1180
1181 device_information = realloc(device_information, size);
1182 if (!device_information) {
1183 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1184 return 1;
1185 }
1186 device_information[ndevs].path = path;
1187 ndevs++;
1188 return 0;
1189}
1190
1191static int open_devices(void)
d0ca268b 1192{
e7c9f3ff 1193 struct device_information *dip;
d0ca268b 1194 int i;
d0ca268b 1195
99c1f5ab 1196 for_each_dip(dip, i) {
cf9208ea 1197 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
1198 if (dip->fd < 0) {
1199 perror(dip->path);
1200 return 1;
1201 }
6a6d3f0f
TZ
1202 dip->buf_size = buf_size;
1203 dip->buf_nr = buf_nr;
1204 dip->page_size = page_size;
e7c9f3ff 1205 }
99c1f5ab 1206
e7c9f3ff
NS
1207 return 0;
1208}
1209
1210static int start_devices(void)
1211{
1212 struct device_information *dip;
1213 int i, j, size;
1214
1215 size = ncpus * sizeof(struct thread_information);
1216 thread_information = malloc(size * ndevs);
1217 if (!thread_information) {
1218 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1219 return 1;
1220 }
d5396421 1221
99c1f5ab 1222 for_each_dip(dip, i) {
e7c9f3ff
NS
1223 if (start_trace(dip)) {
1224 close(dip->fd);
1225 fprintf(stderr, "Failed to start trace on %s\n",
1226 dip->path);
1227 break;
1228 }
1229 }
99c1f5ab 1230
e7c9f3ff 1231 if (i != ndevs) {
ce020676 1232 __for_each_dip(dip, device_information, i, j)
e7c9f3ff 1233 stop_trace(dip);
99c1f5ab 1234
e7c9f3ff
NS
1235 return 1;
1236 }
1237
99c1f5ab 1238 for_each_dip(dip, i) {
e7c9f3ff
NS
1239 dip->threads = thread_information + (i * ncpus);
1240 if (start_threads(dip)) {
1241 fprintf(stderr, "Failed to start worker threads\n");
1242 break;
1243 }
1244 }
99c1f5ab 1245
e7c9f3ff 1246 if (i != ndevs) {
ce020676 1247 __for_each_dip(dip, device_information, i, j)
e7c9f3ff 1248 stop_threads(dip);
99c1f5ab 1249 for_each_dip(dip, i)
e7c9f3ff 1250 stop_trace(dip);
99c1f5ab 1251
e7c9f3ff 1252 return 1;
d0ca268b
JA
1253 }
1254
e7c9f3ff 1255 return 0;
d0ca268b
JA
1256}
1257
e0a1988b 1258static void show_stats(struct device_information *dips, int ndips, int cpus)
e7c9f3ff 1259{
e7c9f3ff
NS
1260 struct device_information *dip;
1261 struct thread_information *tip;
b7106311 1262 unsigned long long events_processed, data_read;
eb3c8108 1263 unsigned long total_drops;
2f903295 1264 int i, j, no_stdout = 0;
eb3c8108
JA
1265
1266 if (is_stat_shown())
1267 return;
1268
2f903295
JA
1269 if (output_name && !strcmp(output_name, "-"))
1270 no_stdout = 1;
1271
eb3c8108 1272 stat_shown = 1;
428683db 1273
56070ea4 1274 total_drops = 0;
ce020676 1275 __for_each_dip(dip, dips, ndips, i) {
2f903295 1276 if (!no_stdout)
56070ea4 1277 printf("Device: %s\n", dip->path);
e7c9f3ff 1278 events_processed = 0;
b7106311 1279 data_read = 0;
ce020676 1280 __for_each_tip(dip, tip, cpus, j) {
2f903295 1281 if (!no_stdout)
b7106311
JA
1282 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1283 tip->cpu, tip->events_processed,
54824c20 1284 (tip->data_read + 1023) >> 10);
e7c9f3ff 1285 events_processed += tip->events_processed;
b7106311 1286 data_read += tip->data_read;
e7c9f3ff 1287 }
eb3c8108 1288 total_drops += dip->drop_count;
2f903295 1289 if (!no_stdout)
b7106311
JA
1290 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1291 events_processed, dip->drop_count,
18d8437d 1292 (data_read + 1023) >> 10);
e7c9f3ff 1293 }
56070ea4
JA
1294
1295 if (total_drops)
1296 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 1297}
52724a0e 1298
e0a1988b 1299static struct device_information *net_get_dip(struct net_connection *nc,
6a6d3f0f 1300 struct blktrace_net_hdr *bnh)
8e86c98a 1301{
ff11d54c
TZ
1302 struct device_information *dip, *cl_dip = NULL;
1303 struct cl_host *ch = nc->ch;
8e86c98a
JA
1304 int i;
1305
ff11d54c
TZ
1306 for (i = 0; i < ch->ndevs; i++) {
1307 dip = &ch->device_information[i];
8e86c98a 1308
6a6d3f0f 1309 if (!strcmp(dip->buts_name, bnh->buts_name))
22cd0c02 1310 return dip;
ff11d54c 1311
6a6d3f0f 1312 if (dip->cl_id == bnh->cl_id)
ff11d54c 1313 cl_dip = dip;
8e86c98a
JA
1314 }
1315
ff11d54c
TZ
1316 ch->device_information = realloc(ch->device_information, (ch->ndevs + 1) * sizeof(*dip));
1317 dip = &ch->device_information[ch->ndevs];
921b05fe
JA
1318 memset(dip, 0, sizeof(*dip));
1319 dip->fd = -1;
ff11d54c 1320 dip->ch = ch;
6a6d3f0f
TZ
1321 dip->cl_id = bnh->cl_id;
1322 dip->buf_size = bnh->buf_size;
1323 dip->buf_nr = bnh->buf_nr;
1324 dip->page_size = bnh->page_size;
1325
ff11d54c
TZ
1326 if (cl_dip)
1327 dip->cl_connect_time = cl_dip->cl_connect_time;
1328 else
1329 dip->cl_connect_time = nc->connect_time;
6a6d3f0f
TZ
1330 strcpy(dip->buts_name, bnh->buts_name);
1331 dip->path = strdup(bnh->buts_name);
7ab2f837 1332 dip->trace_started = 1;
ff11d54c 1333 ch->ndevs++;
e0a1988b
JA
1334 dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
1335 memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
22cd0c02
JA
1336
1337 /*
1338 * open all files
1339 */
e0a1988b 1340 for (i = 0; i < nc->ncpus; i++) {
22cd0c02 1341 struct thread_information *tip = &dip->threads[i];
8e86c98a 1342
22cd0c02 1343 tip->cpu = i;
22cd0c02 1344 tip->device = dip;
1366e53a 1345 tip->fd = -1;
ff11d54c
TZ
1346 tip->nc = nc;
1347
ddf22842 1348 if (tip_open_output(dip, tip))
22cd0c02 1349 return NULL;
ff11d54c
TZ
1350
1351 tip->nc = NULL;
8e86c98a
JA
1352 }
1353
22cd0c02
JA
1354 return dip;
1355}
1356
e0a1988b
JA
1357static struct thread_information *net_get_tip(struct net_connection *nc,
1358 struct blktrace_net_hdr *bnh)
22cd0c02
JA
1359{
1360 struct device_information *dip;
ff11d54c 1361 struct thread_information *tip;
22cd0c02 1362
6a6d3f0f 1363 dip = net_get_dip(nc, bnh);
7ab2f837
JA
1364 if (!dip->trace_started) {
1365 fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
1366 return NULL;
1367 }
1368
ff11d54c
TZ
1369 tip = &dip->threads[bnh->cpu];
1370 if (!tip->nc)
1371 tip->nc = nc;
1372
1373 return tip;
8e86c98a
JA
1374}
1375
e0a1988b
JA
1376static int net_get_header(struct net_connection *nc,
1377 struct blktrace_net_hdr *bnh)
8e86c98a 1378{
e0a1988b 1379 int fl = fcntl(nc->in_fd, F_GETFL);
8e86c98a
JA
1380 int bytes_left, ret;
1381 void *p = bnh;
1382
e0a1988b 1383 fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
8e86c98a
JA
1384 bytes_left = sizeof(*bnh);
1385 while (bytes_left && !is_done()) {
e0a1988b 1386 ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
8e86c98a
JA
1387 if (ret < 0) {
1388 if (errno != EAGAIN) {
1389 perror("recv header");
1390 return 1;
1391 }
7934e668 1392 usleep(1000);
8e86c98a
JA
1393 continue;
1394 } else if (!ret) {
7934e668 1395 usleep(1000);
8e86c98a
JA
1396 continue;
1397 } else {
1398 p += ret;
1399 bytes_left -= ret;
1400 }
1401 }
e0a1988b 1402 fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
227f89ff 1403 return bytes_left;
8e86c98a
JA
1404}
1405
e0a1988b
JA
1406/*
1407 * finalize a net client: truncate files, show stats, cleanup, etc
1408 */
ff11d54c 1409static void device_done(struct net_connection *nc, struct device_information *dip)
e0a1988b 1410{
e0a1988b 1411 struct thread_information *tip;
ff11d54c 1412 int i;
e0a1988b 1413
ff11d54c
TZ
1414 __for_each_tip(dip, tip, nc->ncpus, i)
1415 tip_ftrunc_final(tip);
e0a1988b 1416
ff11d54c 1417 show_stats(dip, 1, nc->ncpus);
e0a1988b
JA
1418
1419 /*
1420 * cleanup for next run
1421 */
ff11d54c
TZ
1422 __for_each_tip(dip, tip, nc->ncpus, i) {
1423 if (tip->ofile)
1424 fclose(tip->ofile);
e0a1988b
JA
1425 }
1426
ff11d54c
TZ
1427 free(dip->threads);
1428 free(dip->path);
e0a1988b
JA
1429
1430 close(nc->in_fd);
1431 nc->in_fd = -1;
1432
ff11d54c
TZ
1433 stat_shown = 0;
1434}
e0a1988b 1435
ff11d54c
TZ
1436static inline int in_addr_eq(struct in_addr a, struct in_addr b)
1437{
1438 return a.s_addr == b.s_addr;
1439}
1440
1441static void net_add_client_host(struct cl_host *ch)
1442{
1443 ch->list_next = cl_host_list;
1444 cl_host_list = ch;
1445 cl_hosts++;
1446}
1447
1448static void net_remove_client_host(struct cl_host *ch)
1449{
1450 struct cl_host *p, *c;
1451
1452 for (p = c = cl_host_list; c; c = c->list_next) {
1453 if (c == ch) {
1454 if (p == c)
1455 cl_host_list = c->list_next;
1456 else
1457 p->list_next = c->list_next;
1458 cl_hosts--;
1459 return;
1460 }
1461 p = c;
e0a1988b 1462 }
ff11d54c 1463}
e0a1988b 1464
ff11d54c
TZ
1465static struct cl_host *net_find_client_host(struct in_addr cl_in_addr)
1466{
1467 struct cl_host *ch = cl_host_list;
1468
1469 while (ch) {
1470 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
1471 return ch;
1472 ch = ch->list_next;
1473 }
1474
1475 return NULL;
1476}
1477
ff11d54c
TZ
1478static void net_client_host_done(struct cl_host *ch)
1479{
1480 free(ch->device_information);
1481 free(ch->net_connections);
1482 net_connects -= ch->nconn;
1483 net_remove_client_host(ch);
1484 free(ch);
e0a1988b
JA
1485}
1486
1487/*
1488 * handle incoming events from a net client
1489 */
1490static int net_client_data(struct net_connection *nc)
8e86c98a
JA
1491{
1492 struct thread_information *tip;
1493 struct blktrace_net_hdr bnh;
1494
e0a1988b 1495 if (net_get_header(nc, &bnh))
8e86c98a
JA
1496 return 1;
1497
1498 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1499 fprintf(stderr, "server: received data is bad\n");
1500 return 1;
1501 }
1502
1503 if (!data_is_native) {
227f89ff 1504 bnh.magic = be32_to_cpu(bnh.magic);
8e86c98a 1505 bnh.cpu = be32_to_cpu(bnh.cpu);
4aeec019 1506 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
8e86c98a 1507 bnh.len = be32_to_cpu(bnh.len);
ff11d54c 1508 bnh.cl_id = be32_to_cpu(bnh.cl_id);
6a6d3f0f
TZ
1509 bnh.buf_size = be32_to_cpu(bnh.buf_size);
1510 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
1511 bnh.page_size = be32_to_cpu(bnh.page_size);
8e86c98a
JA
1512 }
1513
227f89ff
JA
1514 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1515 fprintf(stderr, "server: bad data magic\n");
1516 return 1;
1517 }
1518
4aeec019
JA
1519 if (nc->ncpus == -1)
1520 nc->ncpus = bnh.max_cpus;
1521
6a752c90
JA
1522 /*
1523 * len == 0 means that the other end signalled end-of-run
1524 */
1525 if (!bnh.len) {
7934e668
JA
1526 /*
1527 * overload cpu count with dropped events
1528 */
1529 struct device_information *dip;
1530
6a6d3f0f 1531 dip = net_get_dip(nc, &bnh);
7934e668 1532 dip->drop_count = bnh.cpu;
7ab2f837 1533 dip->trace_started = 0;
7934e668 1534
e0a1988b 1535 printf("server: end of run for %s\n", dip->buts_name);
4aeec019 1536
ff11d54c
TZ
1537 device_done(nc, dip);
1538
1539 if (++nc->ch->ndevs_done == nc->ch->ndevs)
1540 net_client_host_done(nc->ch);
4aeec019 1541
e0a1988b 1542 return 0;
6a752c90
JA
1543 }
1544
e0a1988b 1545 tip = net_get_tip(nc, &bnh);
8e86c98a
JA
1546 if (!tip)
1547 return 1;
1548
1549 if (mmap_subbuf(tip, bnh.len))
1550 return 1;
1551
1552 return 0;
1553}
1554
e0a1988b 1555static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
659bcc3f 1556{
e0a1988b
JA
1557 socklen_t socklen = sizeof(*addr);
1558 struct net_connection *nc;
ff11d54c
TZ
1559 struct cl_host *ch;
1560 int in_fd;
659bcc3f 1561
ff11d54c
TZ
1562 in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
1563 if (in_fd < 0) {
1564 perror("accept");
e0a1988b 1565 return;
659bcc3f 1566 }
659bcc3f 1567
ff11d54c
TZ
1568 ch = net_find_client_host(addr->sin_addr);
1569 if (!ch) {
1570 if (cl_hosts == NET_MAX_CL_HOSTS) {
1571 fprintf(stderr, "server: no more clients allowed\n");
1572 return;
1573 }
1574 ch = malloc(sizeof(struct cl_host));
1575 memset(ch, 0, sizeof(*ch));
1576 ch->cl_in_addr = addr->sin_addr;
1577 net_add_client_host(ch);
506cdb6d
JA
1578
1579 printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
659bcc3f
JA
1580 }
1581
ff11d54c
TZ
1582 ch->net_connections = realloc(ch->net_connections, (ch->nconn + 1) * sizeof(*nc));
1583 nc = &ch->net_connections[ch->nconn++];
1584 memset(nc, 0, sizeof(*nc));
1585
e0a1988b 1586 time(&nc->connect_time);
ff11d54c
TZ
1587 nc->ch = ch;
1588 nc->in_fd = in_fd;
4aeec019 1589 nc->ncpus = -1;
e0a1988b
JA
1590 net_connects++;
1591}
1592
1593/*
1594 * event driven loop, handle new incoming connections and data from
1595 * existing connections
1596 */
1597static void net_server_handle_connections(int listen_fd,
1598 struct sockaddr_in *addr)
1599{
ff11d54c
TZ
1600 struct pollfd *pfds = NULL;
1601 struct net_connection **ncs = NULL;
1602 int max_connects = 0;
1603 int i, nconns, events;
1604 struct cl_host *ch;
1605 struct net_connection *nc;
1606
e0a1988b
JA
1607 printf("server: waiting for connections...\n");
1608
1609 while (!is_done()) {
ff11d54c
TZ
1610 if (net_connects >= max_connects) {
1611 pfds = realloc(pfds, (net_connects + 1) * sizeof(*pfds));
1612 ncs = realloc(ncs, (net_connects + 1) * sizeof(*ncs));
1613 max_connects = net_connects + 1;
1614 }
e0a1988b
JA
1615 /*
1616 * the zero entry is for incoming connections, remaining
1617 * entries for clients
1618 */
1619 pfds[0].fd = listen_fd;
1620 pfds[0].events = POLLIN;
ff11d54c
TZ
1621 nconns = 0;
1622 for_each_cl_host(ch) {
1623 for (i = 0; i < ch->nconn; i++) {
1624 nc = &ch->net_connections[i];
1625 pfds[nconns + 1].fd = nc->in_fd;
1626 pfds[nconns + 1].events = POLLIN;
1627 ncs[nconns++] = nc;
1628 }
e0a1988b
JA
1629 }
1630
ff11d54c 1631 events = poll(pfds, 1 + nconns, -1);
e0a1988b
JA
1632 if (events < 0) {
1633 if (errno == EINTR)
1634 continue;
1635
1636 perror("poll");
1637 break;
1638 } else if (!events)
1639 continue;
1640
1641 if (pfds[0].revents & POLLIN) {
1642 net_add_connection(listen_fd, addr);
1643 events--;
1644 }
1645
ff11d54c 1646 for (i = 0; events && i < nconns; i++) {
e0a1988b 1647 if (pfds[i + 1].revents & POLLIN) {
ff11d54c 1648 net_client_data(ncs[i]);
e0a1988b
JA
1649 events--;
1650 }
1651 }
1652 }
659bcc3f
JA
1653}
1654
8e86c98a
JA
1655/*
1656 * Start here when we are in server mode - just fetch data from the network
1657 * and dump to files
1658 */
1659static int net_server(void)
1660{
1661 struct sockaddr_in addr;
e0a1988b 1662 int fd, opt;
8e86c98a
JA
1663
1664 fd = socket(AF_INET, SOCK_STREAM, 0);
1665 if (fd < 0) {
1666 perror("server: socket");
1667 return 1;
1668 }
1669
1670 opt = 1;
1671 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1672 perror("setsockopt");
1673 return 1;
1674 }
1675
1676 memset(&addr, 0, sizeof(addr));
1677 addr.sin_family = AF_INET;
1678 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1679 addr.sin_port = htons(net_port);
1680
1681 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1682 perror("bind");
1683 return 1;
1684 }
1685
1686 if (listen(fd, 1) < 0) {
1687 perror("listen");
1688 return 1;
1689 }
1690
e0a1988b
JA
1691 net_server_handle_connections(fd, &addr);
1692 return 0;
8e86c98a
JA
1693}
1694
1695/*
1696 * Setup outgoing network connection where we will transmit data
1697 */
ff11d54c 1698static int net_setup_client_cpu(int i, struct sockaddr_in *addr)
8e86c98a 1699{
8e86c98a
JA
1700 int fd;
1701
1702 fd = socket(AF_INET, SOCK_STREAM, 0);
1703 if (fd < 0) {
1704 perror("client: socket");
1705 return 1;
1706 }
1707
ff11d54c
TZ
1708 if (connect(fd, (struct sockaddr *) addr, sizeof(*addr)) < 0) {
1709 perror("client: connect");
1710 return 1;
1711 }
1712
1713 net_out_fd[i] = fd;
1714 return 0;
1715}
1716
1717static int net_setup_client(void)
1718{
1719 struct sockaddr_in addr;
1720 int i;
1721
8e86c98a
JA
1722 memset(&addr, 0, sizeof(addr));
1723 addr.sin_family = AF_INET;
1724 addr.sin_port = htons(net_port);
1725
1726 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1727 struct hostent *hent = gethostbyname(hostname);
1728 if (!hent) {
1729 perror("gethostbyname");
1730 return 1;
1731 }
1732
1733 memcpy(&addr.sin_addr, hent->h_addr, 4);
1734 strcpy(hostname, hent->h_name);
1735 }
1736
1737 printf("blktrace: connecting to %s\n", hostname);
1738
ff11d54c
TZ
1739 net_out_fd = malloc(ncpus * sizeof(*net_out_fd));
1740 for (i = 0; i < ncpus; i++) {
1741 if (net_setup_client_cpu(i, &addr))
1742 return 1;
8e86c98a
JA
1743 }
1744
1745 printf("blktrace: connected!\n");
ff11d54c 1746
8e86c98a
JA
1747 return 0;
1748}
1749
52724a0e 1750static char usage_str[] = \
3d06efea 1751 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
52724a0e
JA
1752 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1753 "\t-d Use specified device. May also be given last after options\n" \
3d06efea 1754 "\t-r Path to mounted debugfs, defaults to /debug\n" \
52724a0e 1755 "\t-o File(s) to send output to\n" \
d1d7f15f 1756 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
1757 "\t-k Kill a running trace\n" \
1758 "\t-w Stop after defined time, in seconds\n" \
1759 "\t-a Only trace specified actions. See documentation\n" \
1760 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
1761 "\t-b Sub buffer size in KiB\n" \
1762 "\t-n Number of sub buffers\n" \
f531b94d
JA
1763 "\t-l Run in network listen mode (blktrace server)\n" \
1764 "\t-h Run in network client mode, connecting to the given host\n" \
1765 "\t-p Network port to use (default 8462)\n" \
ff11d54c 1766 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
f531b94d 1767 "\t-V Print program version info\n\n";
52724a0e 1768
ee1f4158
NS
1769static void show_usage(char *program)
1770{
52724a0e 1771 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 1772}
d0ca268b
JA
1773
1774int main(int argc, char *argv[])
1775{
c76adfc1 1776 static char default_debugfs_path[] = "/sys/kernel/debug";
e3e74029 1777 struct statfs st;
d39c04ca 1778 int i, c;
ece238a6 1779 int stop_watch = 0;
d39c04ca
AB
1780 int act_mask_tmp = 0;
1781
1782 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1783 switch (c) {
1784 case 'a':
1785 i = find_mask_map(optarg);
1786 if (i < 0) {
ab197ca7 1787 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 1788 optarg);
7425d456 1789 return 1;
d39c04ca
AB
1790 }
1791 act_mask_tmp |= i;
1792 break;
1793
1794 case 'A':
98f8386b
AB
1795 if ((sscanf(optarg, "%x", &i) != 1) ||
1796 !valid_act_opt(i)) {
d39c04ca 1797 fprintf(stderr,
ab197ca7 1798 "Invalid set action mask %s/0x%x\n",
d39c04ca 1799 optarg, i);
7425d456 1800 return 1;
d39c04ca
AB
1801 }
1802 act_mask_tmp = i;
1803 break;
d0ca268b 1804
d39c04ca 1805 case 'd':
e7c9f3ff
NS
1806 if (resize_devices(optarg) != 0)
1807 return 1;
d39c04ca
AB
1808 break;
1809
5270dddd 1810 case 'r':
3d06efea 1811 debugfs_path = optarg;
5270dddd
JA
1812 break;
1813
d5396421 1814 case 'o':
66efebf8 1815 output_name = optarg;
d5396421 1816 break;
bc39777c
JA
1817 case 'k':
1818 kill_running_trace = 1;
1819 break;
ece238a6
NS
1820 case 'w':
1821 stop_watch = atoi(optarg);
1822 if (stop_watch <= 0) {
1823 fprintf(stderr,
1824 "Invalid stopwatch value (%d secs)\n",
1825 stop_watch);
1826 return 1;
1827 }
1828 break;
57ea8602 1829 case 'V':
52724a0e
JA
1830 printf("%s version %s\n", argv[0], blktrace_version);
1831 return 0;
129aa440 1832 case 'b':
eb3c8108 1833 buf_size = strtoul(optarg, NULL, 10);
183a0855 1834 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 1835 fprintf(stderr,
eb3c8108 1836 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
1837 return 1;
1838 }
1839 buf_size <<= 10;
1840 break;
1841 case 'n':
eb3c8108 1842 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1843 if (buf_nr <= 0) {
1844 fprintf(stderr,
eb3c8108 1845 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1846 return 1;
1847 }
1848 break;
d1d7f15f
JA
1849 case 'D':
1850 output_dir = optarg;
1851 break;
8e86c98a
JA
1852 case 'h':
1853 net_mode = Net_client;
1854 strcpy(hostname, optarg);
1855 break;
1856 case 'l':
1857 net_mode = Net_server;
1858 break;
1859 case 'p':
1860 net_port = atoi(optarg);
1861 break;
32f18c48 1862 case 's':
79971f43 1863 net_use_sendfile = 0;
32f18c48 1864 break;
d39c04ca 1865 default:
ee1f4158 1866 show_usage(argv[0]);
7425d456 1867 return 1;
d39c04ca
AB
1868 }
1869 }
1870
8e86c98a
JA
1871 setlocale(LC_NUMERIC, "en_US");
1872
1873 page_size = getpagesize();
1874
ec685dd2
JA
1875 if (net_mode == Net_server) {
1876 if (output_name) {
1877 fprintf(stderr, "-o ignored in server mode\n");
1878 output_name = NULL;
1879 }
1880
8e86c98a 1881 return net_server();
ec685dd2 1882 }
8e86c98a 1883
22cd0c02
JA
1884 while (optind < argc) {
1885 if (resize_devices(argv[optind++]) != 0)
1886 return 1;
1887 }
1888
e7c9f3ff 1889 if (ndevs == 0) {
ee1f4158 1890 show_usage(argv[0]);
7425d456 1891 return 1;
d39c04ca
AB
1892 }
1893
d5396421 1894 if (act_mask_tmp != 0)
d39c04ca 1895 act_mask = act_mask_tmp;
d0ca268b 1896
3d06efea
JA
1897 if (!debugfs_path)
1898 debugfs_path = default_debugfs_path;
1899
1900 if (statfs(debugfs_path, &st) < 0) {
e3e74029
NS
1901 perror("statfs");
1902 fprintf(stderr,"%s does not appear to be a valid path\n",
3d06efea 1903 debugfs_path);
e3e74029 1904 return 1;
3d06efea
JA
1905 } else if (st.f_type != (long) DEBUGFS_TYPE) {
1906 fprintf(stderr,"%s does not appear to be a debug filesystem\n",
1907 debugfs_path);
7425d456 1908 return 1;
d0ca268b
JA
1909 }
1910
e7c9f3ff 1911 if (open_devices() != 0)
7425d456 1912 return 1;
bc39777c
JA
1913
1914 if (kill_running_trace) {
e7c9f3ff 1915 stop_all_traces();
7425d456 1916 return 0;
bc39777c
JA
1917 }
1918
e7c9f3ff
NS
1919 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1920 if (ncpus < 0) {
1921 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 1922 return 1;
d0ca268b
JA
1923 }
1924
d0ca268b
JA
1925 signal(SIGINT, handle_sigint);
1926 signal(SIGHUP, handle_sigint);
1927 signal(SIGTERM, handle_sigint);
ece238a6 1928 signal(SIGALRM, handle_sigint);
38e1f0c6 1929 signal(SIGPIPE, SIG_IGN);
d0ca268b 1930
8e86c98a
JA
1931 if (net_mode == Net_client && net_setup_client())
1932 return 1;
1933
1934 if (start_devices() != 0)
1935 return 1;
1936
e7c9f3ff 1937 atexit(stop_all_tracing);
830fd65c 1938
ece238a6
NS
1939 if (stop_watch)
1940 alarm(stop_watch);
1941
b7106311 1942 wait_for_threads();
d0ca268b 1943
eb3c8108
JA
1944 if (!is_trace_stopped()) {
1945 trace_stopped = 1;
91816d54
JA
1946 stop_all_threads();
1947 stop_all_traces();
91816d54 1948 }
d0ca268b 1949
e0a1988b 1950 show_stats(device_information, ndevs, ncpus);
eb3c8108 1951
d0ca268b
JA
1952 return 0;
1953}
1954