[PATCH] Ignore PC packets in btt
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
eb3c8108 32#include <sys/poll.h>
b7106311 33#include <sys/mman.h>
8e86c98a 34#include <sys/socket.h>
d0ca268b
JA
35#include <stdio.h>
36#include <stdlib.h>
37#include <sched.h>
d39c04ca
AB
38#include <ctype.h>
39#include <getopt.h>
da39451f 40#include <errno.h>
8e86c98a
JA
41#include <netinet/in.h>
42#include <arpa/inet.h>
43#include <netdb.h>
32f18c48 44#include <sys/sendfile.h>
d0ca268b
JA
45
46#include "blktrace.h"
21f55651 47#include "barrier.h"
d0ca268b 48
a3225fed 49static char blktrace_version[] = "0.99.1";
52724a0e 50
8f551a39
JA
51/*
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
54 */
007c233c 55#define BUF_SIZE (512 * 1024)
d0ca268b
JA
56#define BUF_NR (4)
57
007c233c
JA
58#define OFILE_BUF (128 * 1024)
59
3d06efea 60#define DEBUGFS_TYPE 0x64626720
e3e74029 61
32f18c48 62#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
d5396421 63static struct option l_opts[] = {
5c86134e 64 {
d39c04ca 65 .name = "dev",
428683db 66 .has_arg = required_argument,
d39c04ca
AB
67 .flag = NULL,
68 .val = 'd'
69 },
5c86134e 70 {
d39c04ca 71 .name = "act-mask",
428683db 72 .has_arg = required_argument,
d39c04ca
AB
73 .flag = NULL,
74 .val = 'a'
75 },
5c86134e 76 {
d39c04ca 77 .name = "set-mask",
428683db 78 .has_arg = required_argument,
d39c04ca
AB
79 .flag = NULL,
80 .val = 'A'
81 },
5c86134e 82 {
5270dddd 83 .name = "relay",
428683db 84 .has_arg = required_argument,
5270dddd
JA
85 .flag = NULL,
86 .val = 'r'
87 },
d5396421
JA
88 {
89 .name = "output",
428683db 90 .has_arg = required_argument,
d5396421
JA
91 .flag = NULL,
92 .val = 'o'
93 },
bc39777c
JA
94 {
95 .name = "kill",
428683db 96 .has_arg = no_argument,
bc39777c
JA
97 .flag = NULL,
98 .val = 'k'
99 },
ece238a6
NS
100 {
101 .name = "stopwatch",
428683db 102 .has_arg = required_argument,
ece238a6
NS
103 .flag = NULL,
104 .val = 'w'
105 },
52724a0e
JA
106 {
107 .name = "version",
108 .has_arg = no_argument,
109 .flag = NULL,
57ea8602 110 .val = 'V'
52724a0e 111 },
129aa440 112 {
3f65c585 113 .name = "buffer-size",
129aa440
JA
114 .has_arg = required_argument,
115 .flag = NULL,
116 .val = 'b'
117 },
118 {
3f65c585 119 .name = "num-sub-buffers",
129aa440
JA
120 .has_arg = required_argument,
121 .flag = NULL,
122 .val = 'n'
123 },
d1d7f15f 124 {
3f65c585 125 .name = "output-dir",
d1d7f15f
JA
126 .has_arg = required_argument,
127 .flag = NULL,
128 .val = 'D'
129 },
8e86c98a
JA
130 {
131 .name = "listen",
132 .has_arg = no_argument,
133 .flag = NULL,
134 .val = 'l'
135 },
136 {
137 .name = "host",
138 .has_arg = required_argument,
139 .flag = NULL,
140 .val = 'h'
141 },
142 {
143 .name = "port",
144 .has_arg = required_argument,
145 .flag = NULL,
146 .val = 'p'
147 },
32f18c48 148 {
79971f43 149 .name = "no-sendfile",
32f18c48
JA
150 .has_arg = no_argument,
151 .flag = NULL,
152 .val = 's'
153 },
71ef8b7c
JA
154 {
155 .name = NULL,
156 }
d39c04ca
AB
157};
158
9db17354 159struct tip_subbuf {
9db17354 160 void *buf;
5be4bdaf
JA
161 unsigned int len;
162 unsigned int max_len;
9db17354
JA
163};
164
21f55651
JA
165#define FIFO_SIZE (1024) /* should be plenty big! */
166#define CL_SIZE (128) /* cache line, any bigger? */
167
168struct tip_subbuf_fifo {
169 int tail __attribute__((aligned(CL_SIZE)));
170 int head __attribute__((aligned(CL_SIZE)));
171 struct tip_subbuf *q[FIFO_SIZE];
172};
173
d0ca268b
JA
174struct thread_information {
175 int cpu;
176 pthread_t thread;
b9d4294e
JA
177
178 int fd;
a3e4d330 179 void *fd_buf;
b9d4294e
JA
180 char fn[MAXPATHLEN + 64];
181
007c233c
JA
182 FILE *ofile;
183 char *ofile_buffer;
32f18c48 184 off_t ofile_offset;
9db17354 185 int ofile_stdout;
8e86c98a 186 int ofile_mmap;
007c233c 187
0cc7d25e
JA
188 int (*get_subbuf)(struct thread_information *, unsigned int);
189 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
190 int (*read_data)(struct thread_information *, void *, unsigned int);
191
d0ca268b 192 unsigned long events_processed;
b7106311 193 unsigned long long data_read;
bcbeb60f 194 unsigned long long data_queued;
e7c9f3ff 195 struct device_information *device;
9db17354
JA
196
197 int exited;
198
b7106311
JA
199 /*
200 * piped fifo buffers
201 */
21f55651 202 struct tip_subbuf_fifo fifo;
7de86b12 203 struct tip_subbuf *leftover_ts;
b7106311
JA
204
205 /*
206 * mmap controlled output files
207 */
208 unsigned long long fs_size;
209 unsigned long long fs_max_size;
210 unsigned long fs_off;
211 void *fs_buf;
212 unsigned long fs_buf_len;
ff11d54c
TZ
213
214 struct net_connection *nc;
d0ca268b
JA
215};
216
e7c9f3ff
NS
217struct device_information {
218 int fd;
219 char *path;
220 char buts_name[32];
99c1f5ab 221 volatile int trace_started;
eb3c8108 222 unsigned long drop_count;
e7c9f3ff 223 struct thread_information *threads;
6a6d3f0f
TZ
224 unsigned long buf_size;
225 unsigned long buf_nr;
226 unsigned int page_size;
ff11d54c
TZ
227
228 struct cl_host *ch;
229 u32 cl_id;
230 time_t cl_connect_time;
e7c9f3ff 231};
d0ca268b 232
e7c9f3ff 233static int ncpus;
d0ca268b 234static struct thread_information *thread_information;
e7c9f3ff
NS
235static int ndevs;
236static struct device_information *device_information;
237
238/* command line option globals */
3d06efea 239static char *debugfs_path;
d5396421 240static char *output_name;
d1d7f15f 241static char *output_dir;
5c86134e 242static int act_mask = ~0U;
bc39777c 243static int kill_running_trace;
eb3c8108
JA
244static unsigned long buf_size = BUF_SIZE;
245static unsigned long buf_nr = BUF_NR;
b7106311 246static unsigned int page_size;
d39c04ca 247
e7c9f3ff
NS
248#define is_done() (*(volatile int *)(&done))
249static volatile int done;
250
eb3c8108
JA
251#define is_trace_stopped() (*(volatile int *)(&trace_stopped))
252static volatile int trace_stopped;
253
254#define is_stat_shown() (*(volatile int *)(&stat_shown))
255static volatile int stat_shown;
a3e4d330 256
8e86c98a
JA
257int data_is_native = -1;
258
72ca8801
NS
259static void exit_trace(int status);
260
99c1f5ab
JA
261#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
262#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
263
ce020676 264#define __for_each_dip(__d, __di, __e, __i) \
e0a1988b
JA
265 for (__i = 0, __d = __di; __i < __e; __i++, __d++)
266
267#define for_each_dip(__d, __i) \
ce020676 268 __for_each_dip(__d, device_information, ndevs, __i)
e0a1988b 269#define for_each_nc_dip(__nc, __d, __i) \
ff11d54c 270 __for_each_dip(__d, (__nc)->ch->device_information, (__nc)->ch->ndevs, __i)
99c1f5ab 271
ce020676
JA
272#define __for_each_tip(__d, __t, __ncpus, __j) \
273 for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
9db17354 274#define for_each_tip(__d, __t, __j) \
ce020676 275 __for_each_tip(__d, __t, ncpus, __j)
ff11d54c
TZ
276#define for_each_cl_host(__c) \
277 for (__c = cl_host_list; __c; __c = __c->list_next)
99c1f5ab 278
8e86c98a
JA
279/*
280 * networking stuff follows. we include a magic number so we know whether
281 * to endianness convert or not
282 */
283struct blktrace_net_hdr {
284 u32 magic; /* same as trace magic */
22cd0c02 285 char buts_name[32]; /* trace name */
8e86c98a 286 u32 cpu; /* for which cpu */
22cd0c02 287 u32 max_cpus;
8e86c98a 288 u32 len; /* length of following trace data */
ff11d54c 289 u32 cl_id; /* id for set of client per-cpu connections */
6a6d3f0f
TZ
290 u32 buf_size; /* client buf_size for this trace */
291 u32 buf_nr; /* client buf_nr for this trace */
292 u32 page_size; /* client page_size for this trace */
8e86c98a
JA
293};
294
295#define TRACE_NET_PORT (8462)
296
297enum {
298 Net_none = 0,
299 Net_server,
300 Net_client,
301};
302
303/*
304 * network cmd line params
305 */
306static char hostname[MAXHOSTNAMELEN];
307static int net_port = TRACE_NET_PORT;
308static int net_mode = 0;
79971f43 309static int net_use_sendfile = 1;
8e86c98a 310
ff11d54c
TZ
311struct cl_host {
312 struct cl_host *list_next;
e0a1988b 313 struct in_addr cl_in_addr;
ff11d54c
TZ
314 struct net_connection *net_connections;
315 int nconn;
e0a1988b
JA
316 struct device_information *device_information;
317 int ndevs;
318 int ncpus;
ff11d54c 319 int ndevs_done;
e0a1988b
JA
320};
321
ff11d54c
TZ
322struct net_connection {
323 int in_fd;
324 struct pollfd pfd;
325 time_t connect_time;
326 struct cl_host *ch;
327 int ncpus;
328};
329
330#define NET_MAX_CL_HOSTS (1024)
331static struct cl_host *cl_host_list;
332static int cl_hosts;
e0a1988b 333static int net_connects;
ff11d54c
TZ
334
335static int *net_out_fd;
8e86c98a
JA
336
337static void handle_sigint(__attribute__((__unused__)) int sig)
338{
7035d92d
JA
339 struct device_information *dip;
340 int i;
341
342 /*
343 * stop trace so we can reap currently produced data
344 */
345 for_each_dip(dip, i) {
921b05fe
JA
346 if (dip->fd == -1)
347 continue;
7035d92d
JA
348 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
349 perror("BLKTRACESTOP");
350 }
351
8e86c98a
JA
352 done = 1;
353}
354
eb3c8108
JA
355static int get_dropped_count(const char *buts_name)
356{
357 int fd;
358 char tmp[MAXPATHLEN + 64];
359
360 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
3d06efea 361 debugfs_path, buts_name);
eb3c8108
JA
362
363 fd = open(tmp, O_RDONLY);
364 if (fd < 0) {
365 /*
366 * this may be ok, if the kernel doesn't support dropped counts
367 */
368 if (errno == ENOENT)
369 return 0;
370
371 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
372 return -1;
373 }
374
375 if (read(fd, tmp, sizeof(tmp)) < 0) {
376 perror(tmp);
377 close(fd);
378 return -1;
379 }
380
381 close(fd);
382
383 return atoi(tmp);
384}
385
e7c9f3ff 386static int start_trace(struct device_information *dip)
d0ca268b
JA
387{
388 struct blk_user_trace_setup buts;
389
1f79c4a0 390 memset(&buts, 0, sizeof(buts));
6a6d3f0f
TZ
391 buts.buf_size = dip->buf_size;
392 buts.buf_nr = dip->buf_nr;
d39c04ca 393 buts.act_mask = act_mask;
d0ca268b 394
ed71a31e
JA
395 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
396 perror("BLKTRACESETUP");
397 return 1;
398 }
399
400 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
401 perror("BLKTRACESTART");
d0ca268b
JA
402 return 1;
403 }
404
e7c9f3ff 405 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
99c1f5ab 406 dip_set_tracing(dip, 1);
d0ca268b
JA
407 return 0;
408}
409
e7c9f3ff 410static void stop_trace(struct device_information *dip)
d0ca268b 411{
99c1f5ab
JA
412 if (dip_tracing(dip) || kill_running_trace) {
413 dip_set_tracing(dip, 0);
cf9208ea 414
7035d92d
JA
415 /*
416 * should be stopped, just don't complain if it isn't
417 */
418 ioctl(dip->fd, BLKTRACESTOP);
419
ed71a31e
JA
420 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
421 perror("BLKTRACETEARDOWN");
cf9208ea 422
e7c9f3ff 423 close(dip->fd);
cf9208ea 424 dip->fd = -1;
707b0914 425 }
d0ca268b
JA
426}
427
e7c9f3ff
NS
428static void stop_all_traces(void)
429{
430 struct device_information *dip;
431 int i;
432
eb3c8108
JA
433 for_each_dip(dip, i) {
434 dip->drop_count = get_dropped_count(dip->buts_name);
e7c9f3ff 435 stop_trace(dip);
eb3c8108 436 }
e7c9f3ff
NS
437}
438
ff11d54c 439static void wait_for_data(struct thread_information *tip, int timeout)
eb3c8108 440{
ff11d54c 441 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
eb3c8108 442
ff11d54c
TZ
443 while (!is_done()) {
444 if (poll(&pfd, 1, timeout) < 0) {
7934e668
JA
445 perror("poll");
446 break;
447 }
ff11d54c 448 if (pfd.revents & POLLIN)
9db17354
JA
449 break;
450 if (tip->ofile_stdout)
451 break;
ff11d54c 452 }
eb3c8108
JA
453}
454
0cc7d25e
JA
455static int read_data_file(struct thread_information *tip, void *buf,
456 unsigned int len)
d0ca268b 457{
ae9f71b3 458 int ret = 0;
bbabf03a 459
9db17354 460 do {
ff11d54c 461 wait_for_data(tip, 100);
ae9f71b3 462
9db17354
JA
463 ret = read(tip->fd, buf, len);
464 if (!ret)
465 continue;
466 else if (ret > 0)
467 return ret;
468 else {
bbabf03a 469 if (errno != EAGAIN) {
a3e4d330
JA
470 perror(tip->fn);
471 fprintf(stderr,"Thread %d failed read of %s\n",
472 tip->cpu, tip->fn);
473 break;
474 }
9db17354 475 continue;
bbabf03a 476 }
9db17354 477 } while (!is_done());
8a43bac5 478
bbabf03a 479 return ret;
8e86c98a 480
8a43bac5
JA
481}
482
0cc7d25e
JA
483static int read_data_net(struct thread_information *tip, void *buf,
484 unsigned int len)
8e86c98a 485{
ff11d54c 486 struct net_connection *nc = tip->nc;
8e86c98a
JA
487 unsigned int bytes_left = len;
488 int ret = 0;
489
490 do {
e0a1988b 491 ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
8e86c98a
JA
492
493 if (!ret)
494 continue;
495 else if (ret < 0) {
496 if (errno != EAGAIN) {
497 perror(tip->fn);
498 fprintf(stderr, "server: failed read\n");
499 return 0;
500 }
501 continue;
502 } else {
503 buf += ret;
504 bytes_left -= ret;
505 }
506 } while (!is_done() && bytes_left);
507
410d7c62 508 return len - bytes_left;
8e86c98a
JA
509}
510
8e86c98a
JA
511static inline struct tip_subbuf *
512subbuf_fifo_dequeue(struct thread_information *tip)
a3e4d330 513{
21f55651
JA
514 const int head = tip->fifo.head;
515 const int next = (head + 1) & (FIFO_SIZE - 1);
516
517 if (head != tip->fifo.tail) {
518 struct tip_subbuf *ts = tip->fifo.q[head];
519
520 store_barrier();
521 tip->fifo.head = next;
522 return ts;
523 }
524
525 return NULL;
9db17354 526}
eb3c8108 527
21f55651
JA
528static inline int subbuf_fifo_queue(struct thread_information *tip,
529 struct tip_subbuf *ts)
9db17354 530{
21f55651
JA
531 const int tail = tip->fifo.tail;
532 const int next = (tail + 1) & (FIFO_SIZE - 1);
533
534 if (next != tip->fifo.head) {
535 tip->fifo.q[tail] = ts;
536 store_barrier();
537 tip->fifo.tail = next;
538 return 0;
539 }
540
541 fprintf(stderr, "fifo too small!\n");
542 return 1;
a3e4d330
JA
543}
544
b7106311
JA
545/*
546 * For file output, truncate and mmap the file appropriately
547 */
8e86c98a 548static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
b7106311
JA
549{
550 int ofd = fileno(tip->ofile);
551 int ret;
6a6d3f0f 552 unsigned long nr;
b7106311
JA
553
554 /*
555 * extend file, if we have to. use chunks of 16 subbuffers.
556 */
6a6d3f0f 557 if (tip->fs_off + maxlen > tip->fs_buf_len) {
b7106311 558 if (tip->fs_buf) {
5975d309 559 munlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
560 munmap(tip->fs_buf, tip->fs_buf_len);
561 tip->fs_buf = NULL;
562 }
563
6a6d3f0f
TZ
564 tip->fs_off = tip->fs_size & (tip->device->page_size - 1);
565 nr = max(16, tip->device->buf_nr);
566 tip->fs_buf_len = (nr * tip->device->buf_size) - tip->fs_off;
b7106311
JA
567 tip->fs_max_size += tip->fs_buf_len;
568
569 if (ftruncate(ofd, tip->fs_max_size) < 0) {
570 perror("ftruncate");
571 return -1;
572 }
573
574 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
575 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
576 if (tip->fs_buf == MAP_FAILED) {
577 perror("mmap");
578 return -1;
579 }
5975d309 580 mlock(tip->fs_buf, tip->fs_buf_len);
b7106311
JA
581 }
582
7934e668 583 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
b7106311 584 if (ret >= 0) {
dbfbd6db 585 tip->data_read += ret;
b7106311
JA
586 tip->fs_size += ret;
587 tip->fs_off += ret;
588 return 0;
589 }
590
591 return -1;
592}
593
18eed2a7
JA
594/*
595 * Use the copy approach for pipes and network
596 */
597static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
598{
599 struct tip_subbuf *ts = malloc(sizeof(*ts));
600 int ret;
601
6a6d3f0f 602 ts->buf = malloc(tip->device->buf_size);
18eed2a7
JA
603 ts->max_len = maxlen;
604
7934e668 605 ret = tip->read_data(tip, ts->buf, ts->max_len);
18eed2a7
JA
606 if (ret > 0) {
607 ts->len = ret;
dbfbd6db 608 tip->data_read += ret;
7035d92d 609 if (subbuf_fifo_queue(tip, ts))
2f064793
JA
610 ret = -1;
611 }
612
613 if (ret <= 0) {
614 free(ts->buf);
615 free(ts);
18eed2a7
JA
616 }
617
618 return ret;
619}
620
9db17354 621static void close_thread(struct thread_information *tip)
a3e4d330 622{
9db17354
JA
623 if (tip->fd != -1)
624 close(tip->fd);
625 if (tip->ofile)
626 fclose(tip->ofile);
627 if (tip->ofile_buffer)
628 free(tip->ofile_buffer);
629 if (tip->fd_buf)
630 free(tip->fd_buf);
1c99bc21 631
9db17354
JA
632 tip->fd = -1;
633 tip->ofile = NULL;
634 tip->ofile_buffer = NULL;
635 tip->fd_buf = NULL;
a3e4d330
JA
636}
637
8e86c98a
JA
638static void tip_ftrunc_final(struct thread_information *tip)
639{
640 /*
641 * truncate to right size and cleanup mmap
642 */
c196b5f2 643 if (tip->ofile_mmap && tip->ofile) {
8e86c98a
JA
644 int ofd = fileno(tip->ofile);
645
646 if (tip->fs_buf)
647 munmap(tip->fs_buf, tip->fs_buf_len);
648
649 ftruncate(ofd, tip->fs_size);
650 }
651}
652
9db17354 653static void *thread_main(void *arg)
a3e4d330 654{
9db17354
JA
655 struct thread_information *tip = arg;
656 pid_t pid = getpid();
657 cpu_set_t cpu_mask;
a3e4d330 658
9db17354
JA
659 CPU_ZERO(&cpu_mask);
660 CPU_SET((tip->cpu), &cpu_mask);
a3e4d330 661
9db17354
JA
662 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
663 perror("sched_setaffinity");
664 exit_trace(1);
665 }
a3e4d330 666
9db17354 667 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
3d06efea 668 debugfs_path, tip->device->buts_name, tip->cpu);
9db17354
JA
669 tip->fd = open(tip->fn, O_RDONLY);
670 if (tip->fd < 0) {
671 perror(tip->fn);
672 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
673 tip->fn);
674 exit_trace(1);
a3e4d330
JA
675 }
676
b7106311 677 while (!is_done()) {
6a6d3f0f 678 if (tip->get_subbuf(tip, tip->device->buf_size) < 0)
0cc7d25e 679 break;
b7106311
JA
680 }
681
7035d92d
JA
682 /*
683 * trace is stopped, pull data until we get a short read
684 */
6a6d3f0f 685 while (tip->get_subbuf(tip, tip->device->buf_size) > 0)
7035d92d
JA
686 ;
687
8e86c98a
JA
688 tip_ftrunc_final(tip);
689 tip->exited = 1;
690 return NULL;
691}
b7106311 692
8e86c98a
JA
693static int write_data_net(int fd, void *buf, unsigned int buf_len)
694{
695 unsigned int bytes_left = buf_len;
696 int ret;
b7106311 697
8e86c98a
JA
698 while (bytes_left) {
699 ret = send(fd, buf, bytes_left, 0);
700 if (ret < 0) {
701 perror("send");
702 return 1;
703 }
704
705 buf += ret;
706 bytes_left -= ret;
9db17354 707 }
a3e4d330 708
8e86c98a 709 return 0;
a3e4d330
JA
710}
711
32f18c48 712static int net_send_header(struct thread_information *tip, unsigned int len)
8e86c98a
JA
713{
714 struct blktrace_net_hdr hdr;
8e86c98a
JA
715
716 hdr.magic = BLK_IO_TRACE_MAGIC;
22cd0c02 717 strcpy(hdr.buts_name, tip->device->buts_name);
8e86c98a 718 hdr.cpu = tip->cpu;
22cd0c02 719 hdr.max_cpus = ncpus;
32f18c48 720 hdr.len = len;
ff11d54c 721 hdr.cl_id = getpid();
6a6d3f0f
TZ
722 hdr.buf_size = tip->device->buf_size;
723 hdr.buf_nr = tip->device->buf_nr;
724 hdr.page_size = tip->device->page_size;
725
ff11d54c 726 return write_data_net(net_out_fd[tip->cpu], &hdr, sizeof(hdr));
32f18c48 727}
8e86c98a 728
6a752c90
JA
729/*
730 * send header with 0 length to signal end-of-run
731 */
732static void net_client_send_close(void)
733{
7934e668 734 struct device_information *dip;
6a752c90 735 struct blktrace_net_hdr hdr;
7934e668 736 int i;
6a752c90 737
7934e668 738 for_each_dip(dip, i) {
7ab2f837
JA
739 hdr.magic = BLK_IO_TRACE_MAGIC;
740 hdr.max_cpus = ncpus;
741 hdr.len = 0;
7934e668 742 strcpy(hdr.buts_name, dip->buts_name);
7ab2f837 743 hdr.cpu = get_dropped_count(dip->buts_name);
ff11d54c 744 hdr.cl_id = getpid();
6a6d3f0f
TZ
745 hdr.buf_size = dip->buf_size;
746 hdr.buf_nr = dip->buf_nr;
747 hdr.page_size = dip->page_size;
7ab2f837 748
ff11d54c 749 write_data_net(net_out_fd[0], &hdr, sizeof(hdr));
7934e668
JA
750 }
751
6a752c90
JA
752}
753
32f18c48
JA
754static int flush_subbuf_net(struct thread_information *tip,
755 struct tip_subbuf *ts)
756{
757 if (net_send_header(tip, ts->len))
7934e668 758 return -1;
ff11d54c 759 if (write_data_net(net_out_fd[tip->cpu], ts->buf, ts->len))
7934e668 760 return -1;
8e86c98a 761
f0597a7e 762 free(ts->buf);
8e86c98a 763 free(ts);
7934e668 764 return 1;
8e86c98a
JA
765}
766
f6fead25
JA
767static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
768{
ff11d54c 769 int ret = sendfile(net_out_fd[tip->cpu], tip->fd, NULL, ts->len);
11629347
JA
770
771 if (ret < 0) {
772 perror("sendfile");
773 return 1;
774 } else if (ret < (int) ts->len) {
775 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
776 return 1;
777 }
778
779 return 0;
780}
781
32f18c48
JA
782static int flush_subbuf_sendfile(struct thread_information *tip,
783 struct tip_subbuf *ts)
784{
7934e668 785 int ret = -1;
18eed2a7 786
f6fead25 787 if (net_send_header(tip, ts->len))
11629347 788 goto err;
f6fead25 789 if (net_sendfile(tip, ts))
11629347 790 goto err;
32f18c48 791
f6fead25 792 tip->data_read += ts->len;
7934e668 793 ret = 1;
11629347 794err:
32f18c48 795 free(ts);
11629347 796 return ret;
32f18c48
JA
797}
798
ff11d54c 799static int get_subbuf_sendfile(struct thread_information *tip,
d042efdf 800 __attribute__((__unused__)) unsigned int maxlen)
ff11d54c
TZ
801{
802 struct tip_subbuf *ts;
803 struct stat sb;
804 unsigned int ready;
ff11d54c 805
2689be58 806 wait_for_data(tip, -1);
ff11d54c
TZ
807
808 if (fstat(tip->fd, &sb) < 0) {
809 perror("trace stat");
810 return -1;
811 }
4aeec019 812
ff11d54c
TZ
813 ready = sb.st_size - tip->data_queued;
814 if (!ready) {
815 usleep(1000);
816 return 0;
817 }
818
819 ts = malloc(sizeof(*ts));
820 ts->buf = NULL;
821 ts->max_len = 0;
822 ts->len = ready;
823 tip->data_queued += ready;
824
d042efdf
JA
825 if (flush_subbuf_sendfile(tip, ts) < 0)
826 return -1;
ff11d54c
TZ
827
828 return ready;
829}
830
8e86c98a
JA
831static int write_data(struct thread_information *tip, void *buf,
832 unsigned int buf_len)
8a43bac5 833{
7126171a 834 int ret;
8a43bac5 835
6480258a
JA
836 if (!buf_len)
837 return 0;
838
1452478f
JA
839 ret = fwrite(buf, buf_len, 1, tip->ofile);
840 if (ferror(tip->ofile) || ret != 1) {
841 perror("fwrite");
842 clearerr(tip->ofile);
843 return 1;
d0ca268b
JA
844 }
845
9db17354 846 if (tip->ofile_stdout)
7126171a
JA
847 fflush(tip->ofile);
848
8a43bac5
JA
849 return 0;
850}
851
8e86c98a
JA
852static int flush_subbuf_file(struct thread_information *tip,
853 struct tip_subbuf *ts)
8a43bac5 854{
9db17354
JA
855 unsigned int offset = 0;
856 struct blk_io_trace *t;
857 int pdu_len, events = 0;
8a43bac5 858
9db17354 859 /*
7de86b12 860 * surplus from last run
9db17354 861 */
7de86b12
AB
862 if (tip->leftover_ts) {
863 struct tip_subbuf *prev_ts = tip->leftover_ts;
864
9e8b753c 865 if (prev_ts->len + ts->len > prev_ts->max_len) {
7de86b12
AB
866 prev_ts->max_len += ts->len;
867 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
868 }
869
9e8b753c 870 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
7de86b12
AB
871 prev_ts->len += ts->len;
872
873 free(ts->buf);
874 free(ts);
875
876 ts = prev_ts;
877 tip->leftover_ts = NULL;
9db17354 878 }
d0ca268b 879
9db17354
JA
880 while (offset + sizeof(*t) <= ts->len) {
881 t = ts->buf + offset;
3a9d6c13 882
9cfa6c2b
AB
883 if (verify_trace(t)) {
884 write_data(tip, ts->buf, offset);
9db17354 885 return -1;
9cfa6c2b 886 }
3a9d6c13 887
9db17354 888 pdu_len = t->pdu_len;
3a9d6c13 889
9db17354 890 if (offset + sizeof(*t) + pdu_len > ts->len)
3a9d6c13 891 break;
4b5db44a 892
9db17354
JA
893 offset += sizeof(*t) + pdu_len;
894 tip->events_processed++;
b7106311 895 tip->data_read += sizeof(*t) + pdu_len;
9db17354 896 events++;
3a9d6c13
JA
897 }
898
9cfa6c2b
AB
899 if (write_data(tip, ts->buf, offset))
900 return -1;
901
3a9d6c13 902 /*
9db17354 903 * leftover bytes, save them for next time
3a9d6c13 904 */
9db17354 905 if (offset != ts->len) {
7de86b12 906 tip->leftover_ts = ts;
9e8b753c
JA
907 ts->len -= offset;
908 memmove(ts->buf, ts->buf + offset, ts->len);
7de86b12
AB
909 } else {
910 free(ts->buf);
911 free(ts);
9db17354 912 }
4b5db44a 913
9db17354 914 return events;
4b5db44a
JA
915}
916
9db17354 917static int write_tip_events(struct thread_information *tip)
d5396421 918{
21f55651 919 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
d5396421 920
0cc7d25e
JA
921 if (ts)
922 return tip->flush_subbuf(tip, ts);
91816d54 923
9db17354 924 return 0;
91816d54
JA
925}
926
9db17354
JA
927/*
928 * scans the tips we know and writes out the subbuffers we accumulate
929 */
930static void get_and_write_events(void)
d0ca268b 931{
9db17354
JA
932 struct device_information *dip;
933 struct thread_information *tip;
27223f19 934 int i, j, events, ret, tips_running;
d0ca268b 935
9db17354
JA
936 while (!is_done()) {
937 events = 0;
d0ca268b 938
9db17354
JA
939 for_each_dip(dip, i) {
940 for_each_tip(dip, tip, j) {
941 ret = write_tip_events(tip);
942 if (ret > 0)
943 events += ret;
944 }
945 }
d0ca268b 946
9db17354 947 if (!events)
7934e668 948 usleep(100000);
d0ca268b
JA
949 }
950
a3e4d330 951 /*
9db17354 952 * reap stored events
a3e4d330 953 */
9db17354
JA
954 do {
955 events = 0;
27223f19 956 tips_running = 0;
9db17354
JA
957 for_each_dip(dip, i) {
958 for_each_tip(dip, tip, j) {
959 ret = write_tip_events(tip);
960 if (ret > 0)
961 events += ret;
27223f19 962 tips_running += !tip->exited;
9db17354 963 }
69e65a9e 964 }
9db17354 965 usleep(10);
27223f19 966 } while (events || tips_running);
d0ca268b
JA
967}
968
b7106311
JA
969static void wait_for_threads(void)
970{
971 /*
8e86c98a
JA
972 * for piped or network output, poll and fetch data for writeout.
973 * for files, we just wait around for trace threads to exit
b7106311 974 */
8e86c98a 975 if ((output_name && !strcmp(output_name, "-")) ||
ff11d54c 976 ((net_mode == Net_client) && !net_use_sendfile))
b7106311
JA
977 get_and_write_events();
978 else {
979 struct device_information *dip;
980 struct thread_information *tip;
981 int i, j, tips_running;
982
983 do {
984 tips_running = 0;
7934e668 985 usleep(100000);
b7106311
JA
986
987 for_each_dip(dip, i)
988 for_each_tip(dip, tip, j)
989 tips_running += !tip->exited;
990 } while (tips_running);
991 }
6a752c90
JA
992
993 if (net_mode == Net_client)
994 net_client_send_close();
b7106311
JA
995}
996
97159c02
JA
997static int fill_ofname(struct device_information *dip,
998 struct thread_information *tip, char *dst,
e3bf54d8 999 char *buts_name)
8e86c98a 1000{
e3bf54d8 1001 struct stat sb;
8e86c98a
JA
1002 int len = 0;
1003
1004 if (output_dir)
1005 len = sprintf(dst, "%s/", output_dir);
dd870ef6
AB
1006 else
1007 len = sprintf(dst, "./");
8e86c98a 1008
e3bf54d8 1009 if (net_mode == Net_server) {
ff11d54c 1010 struct net_connection *nc = tip->nc;
e0a1988b 1011
ff11d54c
TZ
1012 len += sprintf(dst + len, "%s-", inet_ntoa(nc->ch->cl_in_addr));
1013 len += strftime(dst + len, 64, "%F-%T/", gmtime(&dip->cl_connect_time));
e3bf54d8
JA
1014 }
1015
1016 if (stat(dst, &sb) < 0) {
1017 if (errno != ENOENT) {
1018 perror("stat");
1019 return 1;
1020 }
1021 if (mkdir(dst, 0755) < 0) {
1022 perror(dst);
1023 fprintf(stderr, "Can't make output dir\n");
1024 return 1;
1025 }
1026 }
1027
8e86c98a 1028 if (output_name)
e3bf54d8 1029 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
8e86c98a 1030 else
e3bf54d8
JA
1031 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1032
1033 return 0;
8e86c98a
JA
1034}
1035
0cc7d25e
JA
1036static void fill_ops(struct thread_information *tip)
1037{
1038 /*
1039 * setup ops
1040 */
32f18c48 1041 if (net_mode == Net_client) {
36808255 1042 if (net_use_sendfile) {
32f18c48 1043 tip->get_subbuf = get_subbuf_sendfile;
ff11d54c 1044 tip->flush_subbuf = NULL;
32f18c48
JA
1045 } else {
1046 tip->get_subbuf = get_subbuf;
1047 tip->flush_subbuf = flush_subbuf_net;
1048 }
1049 } else {
1050 if (tip->ofile_mmap)
1051 tip->get_subbuf = mmap_subbuf;
1052 else
1053 tip->get_subbuf = get_subbuf;
0cc7d25e 1054
0cc7d25e 1055 tip->flush_subbuf = flush_subbuf_file;
32f18c48
JA
1056 }
1057
0cc7d25e
JA
1058 if (net_mode == Net_server)
1059 tip->read_data = read_data_net;
1060 else
1061 tip->read_data = read_data_file;
1062}
1063
ddf22842
JA
1064static int tip_open_output(struct device_information *dip,
1065 struct thread_information *tip)
d0ca268b 1066{
ddf22842 1067 int pipeline = output_name && !strcmp(output_name, "-");
8e86c98a 1068 int mode, vbuf_size;
e3bf54d8 1069 char op[128];
d0ca268b 1070
ddf22842
JA
1071 if (net_mode == Net_client) {
1072 tip->ofile = NULL;
1073 tip->ofile_stdout = 0;
1074 tip->ofile_mmap = 0;
0c0b75b4 1075 goto done;
ddf22842
JA
1076 } else if (pipeline) {
1077 tip->ofile = fdopen(STDOUT_FILENO, "w");
1078 tip->ofile_stdout = 1;
1079 tip->ofile_mmap = 0;
1080 mode = _IOLBF;
1081 vbuf_size = 512;
1082 } else {
97159c02 1083 if (fill_ofname(dip, tip, op, dip->buts_name))
e3bf54d8 1084 return 1;
ddf22842
JA
1085 tip->ofile = fopen(op, "w+");
1086 tip->ofile_stdout = 0;
1087 tip->ofile_mmap = 1;
1088 mode = _IOFBF;
1089 vbuf_size = OFILE_BUF;
1090 }
d5396421 1091
0c0b75b4 1092 if (tip->ofile == NULL) {
ddf22842
JA
1093 perror(op);
1094 return 1;
1095 }
d5396421 1096
0c0b75b4
JA
1097 tip->ofile_buffer = malloc(vbuf_size);
1098 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1099 perror("setvbuf");
1100 close_thread(tip);
1101 return 1;
ddf22842
JA
1102 }
1103
0c0b75b4 1104done:
ddf22842
JA
1105 fill_ops(tip);
1106 return 0;
1107}
007c233c 1108
ddf22842
JA
1109static int start_threads(struct device_information *dip)
1110{
1111 struct thread_information *tip;
1112 int j;
1113
1114 for_each_tip(dip, tip, j) {
1115 tip->cpu = j;
1116 tip->device = dip;
1117 tip->events_processed = 0;
11eedd9b 1118 tip->fd = -1;
ddf22842
JA
1119 memset(&tip->fifo, 0, sizeof(tip->fifo));
1120 tip->leftover_ts = NULL;
1121
1122 if (tip_open_output(dip, tip))
1123 return 1;
0cc7d25e 1124
9db17354 1125 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
e7c9f3ff 1126 perror("pthread_create");
007c233c 1127 close_thread(tip);
e7c9f3ff 1128 return 1;
d0ca268b
JA
1129 }
1130 }
1131
e7c9f3ff 1132 return 0;
d0ca268b
JA
1133}
1134
e7c9f3ff 1135static void stop_threads(struct device_information *dip)
3aabcd89 1136{
e7c9f3ff 1137 struct thread_information *tip;
91816d54 1138 unsigned long ret;
007c233c
JA
1139 int i;
1140
9db17354 1141 for_each_tip(dip, tip, i) {
91816d54 1142 (void) pthread_join(tip->thread, (void *) &ret);
9db17354
JA
1143 close_thread(tip);
1144 }
3aabcd89
JA
1145}
1146
e7c9f3ff 1147static void stop_all_threads(void)
72ca8801 1148{
e7c9f3ff 1149 struct device_information *dip;
72ca8801
NS
1150 int i;
1151
99c1f5ab 1152 for_each_dip(dip, i)
e7c9f3ff
NS
1153 stop_threads(dip);
1154}
1155
1156static void stop_all_tracing(void)
1157{
1158 struct device_information *dip;
91816d54 1159 int i;
007c233c 1160
91816d54 1161 for_each_dip(dip, i)
e7c9f3ff 1162 stop_trace(dip);
72ca8801
NS
1163}
1164
1165static void exit_trace(int status)
1166{
eb3c8108
JA
1167 if (!is_trace_stopped()) {
1168 trace_stopped = 1;
1169 stop_all_threads();
1170 stop_all_tracing();
1171 }
1172
72ca8801
NS
1173 exit(status);
1174}
1175
e7c9f3ff
NS
1176static int resize_devices(char *path)
1177{
1178 int size = (ndevs + 1) * sizeof(struct device_information);
1179
1180 device_information = realloc(device_information, size);
1181 if (!device_information) {
1182 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1183 return 1;
1184 }
1185 device_information[ndevs].path = path;
1186 ndevs++;
1187 return 0;
1188}
1189
1190static int open_devices(void)
d0ca268b 1191{
e7c9f3ff 1192 struct device_information *dip;
d0ca268b 1193 int i;
d0ca268b 1194
99c1f5ab 1195 for_each_dip(dip, i) {
cf9208ea 1196 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
e7c9f3ff
NS
1197 if (dip->fd < 0) {
1198 perror(dip->path);
1199 return 1;
1200 }
6a6d3f0f
TZ
1201 dip->buf_size = buf_size;
1202 dip->buf_nr = buf_nr;
1203 dip->page_size = page_size;
e7c9f3ff 1204 }
99c1f5ab 1205
e7c9f3ff
NS
1206 return 0;
1207}
1208
1209static int start_devices(void)
1210{
1211 struct device_information *dip;
1212 int i, j, size;
1213
1214 size = ncpus * sizeof(struct thread_information);
1215 thread_information = malloc(size * ndevs);
1216 if (!thread_information) {
1217 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1218 return 1;
1219 }
d5396421 1220
99c1f5ab 1221 for_each_dip(dip, i) {
e7c9f3ff
NS
1222 if (start_trace(dip)) {
1223 close(dip->fd);
1224 fprintf(stderr, "Failed to start trace on %s\n",
1225 dip->path);
1226 break;
1227 }
1228 }
99c1f5ab 1229
e7c9f3ff 1230 if (i != ndevs) {
ce020676 1231 __for_each_dip(dip, device_information, i, j)
e7c9f3ff 1232 stop_trace(dip);
99c1f5ab 1233
e7c9f3ff
NS
1234 return 1;
1235 }
1236
99c1f5ab 1237 for_each_dip(dip, i) {
e7c9f3ff
NS
1238 dip->threads = thread_information + (i * ncpus);
1239 if (start_threads(dip)) {
1240 fprintf(stderr, "Failed to start worker threads\n");
1241 break;
1242 }
1243 }
99c1f5ab 1244
e7c9f3ff 1245 if (i != ndevs) {
ce020676 1246 __for_each_dip(dip, device_information, i, j)
e7c9f3ff 1247 stop_threads(dip);
99c1f5ab 1248 for_each_dip(dip, i)
e7c9f3ff 1249 stop_trace(dip);
99c1f5ab 1250
e7c9f3ff 1251 return 1;
d0ca268b
JA
1252 }
1253
e7c9f3ff 1254 return 0;
d0ca268b
JA
1255}
1256
e0a1988b 1257static void show_stats(struct device_information *dips, int ndips, int cpus)
e7c9f3ff 1258{
e7c9f3ff
NS
1259 struct device_information *dip;
1260 struct thread_information *tip;
b7106311 1261 unsigned long long events_processed, data_read;
eb3c8108 1262 unsigned long total_drops;
2f903295 1263 int i, j, no_stdout = 0;
eb3c8108
JA
1264
1265 if (is_stat_shown())
1266 return;
1267
2f903295
JA
1268 if (output_name && !strcmp(output_name, "-"))
1269 no_stdout = 1;
1270
eb3c8108 1271 stat_shown = 1;
428683db 1272
56070ea4 1273 total_drops = 0;
ce020676 1274 __for_each_dip(dip, dips, ndips, i) {
2f903295 1275 if (!no_stdout)
56070ea4 1276 printf("Device: %s\n", dip->path);
e7c9f3ff 1277 events_processed = 0;
b7106311 1278 data_read = 0;
ce020676 1279 __for_each_tip(dip, tip, cpus, j) {
2f903295 1280 if (!no_stdout)
b7106311
JA
1281 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1282 tip->cpu, tip->events_processed,
54824c20 1283 (tip->data_read + 1023) >> 10);
e7c9f3ff 1284 events_processed += tip->events_processed;
b7106311 1285 data_read += tip->data_read;
e7c9f3ff 1286 }
eb3c8108 1287 total_drops += dip->drop_count;
2f903295 1288 if (!no_stdout)
b7106311
JA
1289 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1290 events_processed, dip->drop_count,
18d8437d 1291 (data_read + 1023) >> 10);
e7c9f3ff 1292 }
56070ea4
JA
1293
1294 if (total_drops)
1295 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
e7c9f3ff 1296}
52724a0e 1297
e0a1988b 1298static struct device_information *net_get_dip(struct net_connection *nc,
6a6d3f0f 1299 struct blktrace_net_hdr *bnh)
8e86c98a 1300{
ff11d54c
TZ
1301 struct device_information *dip, *cl_dip = NULL;
1302 struct cl_host *ch = nc->ch;
8e86c98a
JA
1303 int i;
1304
ff11d54c
TZ
1305 for (i = 0; i < ch->ndevs; i++) {
1306 dip = &ch->device_information[i];
8e86c98a 1307
6a6d3f0f 1308 if (!strcmp(dip->buts_name, bnh->buts_name))
22cd0c02 1309 return dip;
ff11d54c 1310
6a6d3f0f 1311 if (dip->cl_id == bnh->cl_id)
ff11d54c 1312 cl_dip = dip;
8e86c98a
JA
1313 }
1314
ff11d54c
TZ
1315 ch->device_information = realloc(ch->device_information, (ch->ndevs + 1) * sizeof(*dip));
1316 dip = &ch->device_information[ch->ndevs];
921b05fe
JA
1317 memset(dip, 0, sizeof(*dip));
1318 dip->fd = -1;
ff11d54c 1319 dip->ch = ch;
6a6d3f0f
TZ
1320 dip->cl_id = bnh->cl_id;
1321 dip->buf_size = bnh->buf_size;
1322 dip->buf_nr = bnh->buf_nr;
1323 dip->page_size = bnh->page_size;
1324
ff11d54c
TZ
1325 if (cl_dip)
1326 dip->cl_connect_time = cl_dip->cl_connect_time;
1327 else
1328 dip->cl_connect_time = nc->connect_time;
6a6d3f0f
TZ
1329 strcpy(dip->buts_name, bnh->buts_name);
1330 dip->path = strdup(bnh->buts_name);
7ab2f837 1331 dip->trace_started = 1;
ff11d54c 1332 ch->ndevs++;
e0a1988b
JA
1333 dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
1334 memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
22cd0c02
JA
1335
1336 /*
1337 * open all files
1338 */
e0a1988b 1339 for (i = 0; i < nc->ncpus; i++) {
22cd0c02 1340 struct thread_information *tip = &dip->threads[i];
8e86c98a 1341
22cd0c02 1342 tip->cpu = i;
22cd0c02 1343 tip->device = dip;
1366e53a 1344 tip->fd = -1;
ff11d54c
TZ
1345 tip->nc = nc;
1346
ddf22842 1347 if (tip_open_output(dip, tip))
22cd0c02 1348 return NULL;
ff11d54c
TZ
1349
1350 tip->nc = NULL;
8e86c98a
JA
1351 }
1352
22cd0c02
JA
1353 return dip;
1354}
1355
e0a1988b
JA
1356static struct thread_information *net_get_tip(struct net_connection *nc,
1357 struct blktrace_net_hdr *bnh)
22cd0c02
JA
1358{
1359 struct device_information *dip;
ff11d54c 1360 struct thread_information *tip;
22cd0c02 1361
6a6d3f0f 1362 dip = net_get_dip(nc, bnh);
7ab2f837
JA
1363 if (!dip->trace_started) {
1364 fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
1365 return NULL;
1366 }
1367
ff11d54c
TZ
1368 tip = &dip->threads[bnh->cpu];
1369 if (!tip->nc)
1370 tip->nc = nc;
1371
1372 return tip;
8e86c98a
JA
1373}
1374
e0a1988b
JA
1375static int net_get_header(struct net_connection *nc,
1376 struct blktrace_net_hdr *bnh)
8e86c98a 1377{
e0a1988b 1378 int fl = fcntl(nc->in_fd, F_GETFL);
8e86c98a
JA
1379 int bytes_left, ret;
1380 void *p = bnh;
1381
e0a1988b 1382 fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
8e86c98a
JA
1383 bytes_left = sizeof(*bnh);
1384 while (bytes_left && !is_done()) {
e0a1988b 1385 ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
8e86c98a
JA
1386 if (ret < 0) {
1387 if (errno != EAGAIN) {
1388 perror("recv header");
1389 return 1;
1390 }
7934e668 1391 usleep(1000);
8e86c98a
JA
1392 continue;
1393 } else if (!ret) {
7934e668 1394 usleep(1000);
8e86c98a
JA
1395 continue;
1396 } else {
1397 p += ret;
1398 bytes_left -= ret;
1399 }
1400 }
e0a1988b 1401 fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
227f89ff 1402 return bytes_left;
8e86c98a
JA
1403}
1404
e0a1988b
JA
1405/*
1406 * finalize a net client: truncate files, show stats, cleanup, etc
1407 */
ff11d54c 1408static void device_done(struct net_connection *nc, struct device_information *dip)
e0a1988b 1409{
e0a1988b 1410 struct thread_information *tip;
ff11d54c 1411 int i;
e0a1988b 1412
ff11d54c
TZ
1413 __for_each_tip(dip, tip, nc->ncpus, i)
1414 tip_ftrunc_final(tip);
e0a1988b 1415
ff11d54c 1416 show_stats(dip, 1, nc->ncpus);
e0a1988b
JA
1417
1418 /*
1419 * cleanup for next run
1420 */
ff11d54c
TZ
1421 __for_each_tip(dip, tip, nc->ncpus, i) {
1422 if (tip->ofile)
1423 fclose(tip->ofile);
e0a1988b
JA
1424 }
1425
ff11d54c
TZ
1426 free(dip->threads);
1427 free(dip->path);
e0a1988b
JA
1428
1429 close(nc->in_fd);
1430 nc->in_fd = -1;
1431
ff11d54c
TZ
1432 stat_shown = 0;
1433}
e0a1988b 1434
ff11d54c
TZ
1435static inline int in_addr_eq(struct in_addr a, struct in_addr b)
1436{
1437 return a.s_addr == b.s_addr;
1438}
1439
1440static void net_add_client_host(struct cl_host *ch)
1441{
1442 ch->list_next = cl_host_list;
1443 cl_host_list = ch;
1444 cl_hosts++;
1445}
1446
1447static void net_remove_client_host(struct cl_host *ch)
1448{
1449 struct cl_host *p, *c;
1450
1451 for (p = c = cl_host_list; c; c = c->list_next) {
1452 if (c == ch) {
1453 if (p == c)
1454 cl_host_list = c->list_next;
1455 else
1456 p->list_next = c->list_next;
1457 cl_hosts--;
1458 return;
1459 }
1460 p = c;
e0a1988b 1461 }
ff11d54c 1462}
e0a1988b 1463
ff11d54c
TZ
1464static struct cl_host *net_find_client_host(struct in_addr cl_in_addr)
1465{
1466 struct cl_host *ch = cl_host_list;
1467
1468 while (ch) {
1469 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
1470 return ch;
1471 ch = ch->list_next;
1472 }
1473
1474 return NULL;
1475}
1476
ff11d54c
TZ
1477static void net_client_host_done(struct cl_host *ch)
1478{
1479 free(ch->device_information);
1480 free(ch->net_connections);
1481 net_connects -= ch->nconn;
1482 net_remove_client_host(ch);
1483 free(ch);
e0a1988b
JA
1484}
1485
1486/*
1487 * handle incoming events from a net client
1488 */
1489static int net_client_data(struct net_connection *nc)
8e86c98a
JA
1490{
1491 struct thread_information *tip;
1492 struct blktrace_net_hdr bnh;
1493
e0a1988b 1494 if (net_get_header(nc, &bnh))
8e86c98a
JA
1495 return 1;
1496
1497 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1498 fprintf(stderr, "server: received data is bad\n");
1499 return 1;
1500 }
1501
1502 if (!data_is_native) {
227f89ff 1503 bnh.magic = be32_to_cpu(bnh.magic);
8e86c98a 1504 bnh.cpu = be32_to_cpu(bnh.cpu);
4aeec019 1505 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
8e86c98a 1506 bnh.len = be32_to_cpu(bnh.len);
ff11d54c 1507 bnh.cl_id = be32_to_cpu(bnh.cl_id);
6a6d3f0f
TZ
1508 bnh.buf_size = be32_to_cpu(bnh.buf_size);
1509 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
1510 bnh.page_size = be32_to_cpu(bnh.page_size);
8e86c98a
JA
1511 }
1512
227f89ff
JA
1513 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1514 fprintf(stderr, "server: bad data magic\n");
1515 return 1;
1516 }
1517
4aeec019
JA
1518 if (nc->ncpus == -1)
1519 nc->ncpus = bnh.max_cpus;
1520
6a752c90
JA
1521 /*
1522 * len == 0 means that the other end signalled end-of-run
1523 */
1524 if (!bnh.len) {
7934e668
JA
1525 /*
1526 * overload cpu count with dropped events
1527 */
1528 struct device_information *dip;
1529
6a6d3f0f 1530 dip = net_get_dip(nc, &bnh);
7934e668 1531 dip->drop_count = bnh.cpu;
7ab2f837 1532 dip->trace_started = 0;
7934e668 1533
e0a1988b 1534 printf("server: end of run for %s\n", dip->buts_name);
4aeec019 1535
ff11d54c
TZ
1536 device_done(nc, dip);
1537
1538 if (++nc->ch->ndevs_done == nc->ch->ndevs)
1539 net_client_host_done(nc->ch);
4aeec019 1540
e0a1988b 1541 return 0;
6a752c90
JA
1542 }
1543
e0a1988b 1544 tip = net_get_tip(nc, &bnh);
8e86c98a
JA
1545 if (!tip)
1546 return 1;
1547
1548 if (mmap_subbuf(tip, bnh.len))
1549 return 1;
1550
1551 return 0;
1552}
1553
e0a1988b 1554static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
659bcc3f 1555{
e0a1988b
JA
1556 socklen_t socklen = sizeof(*addr);
1557 struct net_connection *nc;
ff11d54c
TZ
1558 struct cl_host *ch;
1559 int in_fd;
659bcc3f 1560
ff11d54c
TZ
1561 in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
1562 if (in_fd < 0) {
1563 perror("accept");
e0a1988b 1564 return;
659bcc3f 1565 }
659bcc3f 1566
ff11d54c
TZ
1567 ch = net_find_client_host(addr->sin_addr);
1568 if (!ch) {
1569 if (cl_hosts == NET_MAX_CL_HOSTS) {
1570 fprintf(stderr, "server: no more clients allowed\n");
1571 return;
1572 }
1573 ch = malloc(sizeof(struct cl_host));
1574 memset(ch, 0, sizeof(*ch));
1575 ch->cl_in_addr = addr->sin_addr;
1576 net_add_client_host(ch);
506cdb6d
JA
1577
1578 printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
659bcc3f
JA
1579 }
1580
ff11d54c
TZ
1581 ch->net_connections = realloc(ch->net_connections, (ch->nconn + 1) * sizeof(*nc));
1582 nc = &ch->net_connections[ch->nconn++];
1583 memset(nc, 0, sizeof(*nc));
1584
e0a1988b 1585 time(&nc->connect_time);
ff11d54c
TZ
1586 nc->ch = ch;
1587 nc->in_fd = in_fd;
4aeec019 1588 nc->ncpus = -1;
e0a1988b
JA
1589 net_connects++;
1590}
1591
1592/*
1593 * event driven loop, handle new incoming connections and data from
1594 * existing connections
1595 */
1596static void net_server_handle_connections(int listen_fd,
1597 struct sockaddr_in *addr)
1598{
ff11d54c
TZ
1599 struct pollfd *pfds = NULL;
1600 struct net_connection **ncs = NULL;
1601 int max_connects = 0;
1602 int i, nconns, events;
1603 struct cl_host *ch;
1604 struct net_connection *nc;
1605
e0a1988b
JA
1606 printf("server: waiting for connections...\n");
1607
1608 while (!is_done()) {
ff11d54c
TZ
1609 if (net_connects >= max_connects) {
1610 pfds = realloc(pfds, (net_connects + 1) * sizeof(*pfds));
1611 ncs = realloc(ncs, (net_connects + 1) * sizeof(*ncs));
1612 max_connects = net_connects + 1;
1613 }
e0a1988b
JA
1614 /*
1615 * the zero entry is for incoming connections, remaining
1616 * entries for clients
1617 */
1618 pfds[0].fd = listen_fd;
1619 pfds[0].events = POLLIN;
ff11d54c
TZ
1620 nconns = 0;
1621 for_each_cl_host(ch) {
1622 for (i = 0; i < ch->nconn; i++) {
1623 nc = &ch->net_connections[i];
1624 pfds[nconns + 1].fd = nc->in_fd;
1625 pfds[nconns + 1].events = POLLIN;
1626 ncs[nconns++] = nc;
1627 }
e0a1988b
JA
1628 }
1629
ff11d54c 1630 events = poll(pfds, 1 + nconns, -1);
e0a1988b
JA
1631 if (events < 0) {
1632 if (errno == EINTR)
1633 continue;
1634
1635 perror("poll");
1636 break;
1637 } else if (!events)
1638 continue;
1639
1640 if (pfds[0].revents & POLLIN) {
1641 net_add_connection(listen_fd, addr);
1642 events--;
1643 }
1644
ff11d54c 1645 for (i = 0; events && i < nconns; i++) {
e0a1988b 1646 if (pfds[i + 1].revents & POLLIN) {
ff11d54c 1647 net_client_data(ncs[i]);
e0a1988b
JA
1648 events--;
1649 }
1650 }
1651 }
659bcc3f
JA
1652}
1653
8e86c98a
JA
1654/*
1655 * Start here when we are in server mode - just fetch data from the network
1656 * and dump to files
1657 */
1658static int net_server(void)
1659{
1660 struct sockaddr_in addr;
e0a1988b 1661 int fd, opt;
8e86c98a
JA
1662
1663 fd = socket(AF_INET, SOCK_STREAM, 0);
1664 if (fd < 0) {
1665 perror("server: socket");
1666 return 1;
1667 }
1668
1669 opt = 1;
1670 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1671 perror("setsockopt");
1672 return 1;
1673 }
1674
1675 memset(&addr, 0, sizeof(addr));
1676 addr.sin_family = AF_INET;
1677 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1678 addr.sin_port = htons(net_port);
1679
1680 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1681 perror("bind");
1682 return 1;
1683 }
1684
1685 if (listen(fd, 1) < 0) {
1686 perror("listen");
1687 return 1;
1688 }
1689
e0a1988b
JA
1690 net_server_handle_connections(fd, &addr);
1691 return 0;
8e86c98a
JA
1692}
1693
1694/*
1695 * Setup outgoing network connection where we will transmit data
1696 */
ff11d54c 1697static int net_setup_client_cpu(int i, struct sockaddr_in *addr)
8e86c98a 1698{
8e86c98a
JA
1699 int fd;
1700
1701 fd = socket(AF_INET, SOCK_STREAM, 0);
1702 if (fd < 0) {
1703 perror("client: socket");
1704 return 1;
1705 }
1706
ff11d54c
TZ
1707 if (connect(fd, (struct sockaddr *) addr, sizeof(*addr)) < 0) {
1708 perror("client: connect");
1709 return 1;
1710 }
1711
1712 net_out_fd[i] = fd;
1713 return 0;
1714}
1715
1716static int net_setup_client(void)
1717{
1718 struct sockaddr_in addr;
1719 int i;
1720
8e86c98a
JA
1721 memset(&addr, 0, sizeof(addr));
1722 addr.sin_family = AF_INET;
1723 addr.sin_port = htons(net_port);
1724
1725 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1726 struct hostent *hent = gethostbyname(hostname);
1727 if (!hent) {
1728 perror("gethostbyname");
1729 return 1;
1730 }
1731
1732 memcpy(&addr.sin_addr, hent->h_addr, 4);
1733 strcpy(hostname, hent->h_name);
1734 }
1735
1736 printf("blktrace: connecting to %s\n", hostname);
1737
ff11d54c
TZ
1738 net_out_fd = malloc(ncpus * sizeof(*net_out_fd));
1739 for (i = 0; i < ncpus; i++) {
1740 if (net_setup_client_cpu(i, &addr))
1741 return 1;
8e86c98a
JA
1742 }
1743
1744 printf("blktrace: connected!\n");
ff11d54c 1745
8e86c98a
JA
1746 return 0;
1747}
1748
52724a0e 1749static char usage_str[] = \
3d06efea 1750 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
52724a0e
JA
1751 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1752 "\t-d Use specified device. May also be given last after options\n" \
3d06efea 1753 "\t-r Path to mounted debugfs, defaults to /debug\n" \
52724a0e 1754 "\t-o File(s) to send output to\n" \
d1d7f15f 1755 "\t-D Directory to prepend to output file names\n" \
52724a0e
JA
1756 "\t-k Kill a running trace\n" \
1757 "\t-w Stop after defined time, in seconds\n" \
1758 "\t-a Only trace specified actions. See documentation\n" \
1759 "\t-A Give trace mask as a single value. See documentation\n" \
129aa440
JA
1760 "\t-b Sub buffer size in KiB\n" \
1761 "\t-n Number of sub buffers\n" \
f531b94d
JA
1762 "\t-l Run in network listen mode (blktrace server)\n" \
1763 "\t-h Run in network client mode, connecting to the given host\n" \
1764 "\t-p Network port to use (default 8462)\n" \
ff11d54c 1765 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
f531b94d 1766 "\t-V Print program version info\n\n";
52724a0e 1767
ee1f4158
NS
1768static void show_usage(char *program)
1769{
52724a0e 1770 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
ee1f4158 1771}
d0ca268b
JA
1772
1773int main(int argc, char *argv[])
1774{
3d06efea 1775 static char default_debugfs_path[] = "/debug";
e3e74029 1776 struct statfs st;
d39c04ca 1777 int i, c;
ece238a6 1778 int stop_watch = 0;
d39c04ca
AB
1779 int act_mask_tmp = 0;
1780
1781 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1782 switch (c) {
1783 case 'a':
1784 i = find_mask_map(optarg);
1785 if (i < 0) {
ab197ca7 1786 fprintf(stderr,"Invalid action mask %s\n",
d39c04ca 1787 optarg);
7425d456 1788 return 1;
d39c04ca
AB
1789 }
1790 act_mask_tmp |= i;
1791 break;
1792
1793 case 'A':
98f8386b
AB
1794 if ((sscanf(optarg, "%x", &i) != 1) ||
1795 !valid_act_opt(i)) {
d39c04ca 1796 fprintf(stderr,
ab197ca7 1797 "Invalid set action mask %s/0x%x\n",
d39c04ca 1798 optarg, i);
7425d456 1799 return 1;
d39c04ca
AB
1800 }
1801 act_mask_tmp = i;
1802 break;
d0ca268b 1803
d39c04ca 1804 case 'd':
e7c9f3ff
NS
1805 if (resize_devices(optarg) != 0)
1806 return 1;
d39c04ca
AB
1807 break;
1808
5270dddd 1809 case 'r':
3d06efea 1810 debugfs_path = optarg;
5270dddd
JA
1811 break;
1812
d5396421 1813 case 'o':
66efebf8 1814 output_name = optarg;
d5396421 1815 break;
bc39777c
JA
1816 case 'k':
1817 kill_running_trace = 1;
1818 break;
ece238a6
NS
1819 case 'w':
1820 stop_watch = atoi(optarg);
1821 if (stop_watch <= 0) {
1822 fprintf(stderr,
1823 "Invalid stopwatch value (%d secs)\n",
1824 stop_watch);
1825 return 1;
1826 }
1827 break;
57ea8602 1828 case 'V':
52724a0e
JA
1829 printf("%s version %s\n", argv[0], blktrace_version);
1830 return 0;
129aa440 1831 case 'b':
eb3c8108 1832 buf_size = strtoul(optarg, NULL, 10);
183a0855 1833 if (buf_size <= 0 || buf_size > 16*1024) {
129aa440 1834 fprintf(stderr,
eb3c8108 1835 "Invalid buffer size (%lu)\n",buf_size);
129aa440
JA
1836 return 1;
1837 }
1838 buf_size <<= 10;
1839 break;
1840 case 'n':
eb3c8108 1841 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1842 if (buf_nr <= 0) {
1843 fprintf(stderr,
eb3c8108 1844 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1845 return 1;
1846 }
1847 break;
d1d7f15f
JA
1848 case 'D':
1849 output_dir = optarg;
1850 break;
8e86c98a
JA
1851 case 'h':
1852 net_mode = Net_client;
1853 strcpy(hostname, optarg);
1854 break;
1855 case 'l':
1856 net_mode = Net_server;
1857 break;
1858 case 'p':
1859 net_port = atoi(optarg);
1860 break;
32f18c48 1861 case 's':
79971f43 1862 net_use_sendfile = 0;
32f18c48 1863 break;
d39c04ca 1864 default:
ee1f4158 1865 show_usage(argv[0]);
7425d456 1866 return 1;
d39c04ca
AB
1867 }
1868 }
1869
8e86c98a
JA
1870 setlocale(LC_NUMERIC, "en_US");
1871
1872 page_size = getpagesize();
1873
1874 if (net_mode == Net_server)
1875 return net_server();
1876
22cd0c02
JA
1877 while (optind < argc) {
1878 if (resize_devices(argv[optind++]) != 0)
1879 return 1;
1880 }
1881
e7c9f3ff 1882 if (ndevs == 0) {
ee1f4158 1883 show_usage(argv[0]);
7425d456 1884 return 1;
d39c04ca
AB
1885 }
1886
d5396421 1887 if (act_mask_tmp != 0)
d39c04ca 1888 act_mask = act_mask_tmp;
d0ca268b 1889
3d06efea
JA
1890 if (!debugfs_path)
1891 debugfs_path = default_debugfs_path;
1892
1893 if (statfs(debugfs_path, &st) < 0) {
e3e74029
NS
1894 perror("statfs");
1895 fprintf(stderr,"%s does not appear to be a valid path\n",
3d06efea 1896 debugfs_path);
e3e74029 1897 return 1;
3d06efea
JA
1898 } else if (st.f_type != (long) DEBUGFS_TYPE) {
1899 fprintf(stderr,"%s does not appear to be a debug filesystem\n",
1900 debugfs_path);
7425d456 1901 return 1;
d0ca268b
JA
1902 }
1903
e7c9f3ff 1904 if (open_devices() != 0)
7425d456 1905 return 1;
bc39777c
JA
1906
1907 if (kill_running_trace) {
e7c9f3ff 1908 stop_all_traces();
7425d456 1909 return 0;
bc39777c
JA
1910 }
1911
e7c9f3ff
NS
1912 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1913 if (ncpus < 0) {
1914 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 1915 return 1;
d0ca268b
JA
1916 }
1917
d0ca268b
JA
1918 signal(SIGINT, handle_sigint);
1919 signal(SIGHUP, handle_sigint);
1920 signal(SIGTERM, handle_sigint);
ece238a6 1921 signal(SIGALRM, handle_sigint);
38e1f0c6 1922 signal(SIGPIPE, SIG_IGN);
d0ca268b 1923
8e86c98a
JA
1924 if (net_mode == Net_client && net_setup_client())
1925 return 1;
1926
1927 if (start_devices() != 0)
1928 return 1;
1929
e7c9f3ff 1930 atexit(stop_all_tracing);
830fd65c 1931
ece238a6
NS
1932 if (stop_watch)
1933 alarm(stop_watch);
1934
b7106311 1935 wait_for_threads();
d0ca268b 1936
eb3c8108
JA
1937 if (!is_trace_stopped()) {
1938 trace_stopped = 1;
91816d54
JA
1939 stop_all_threads();
1940 stop_all_traces();
91816d54 1941 }
d0ca268b 1942
e0a1988b 1943 show_stats(device_information, ndevs, ncpus);
eb3c8108 1944
d0ca268b
JA
1945 return 0;
1946}
1947