2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <sys/types.h>
30 #include <sys/ioctl.h>
31 #include <sys/param.h>
32 #include <sys/statfs.h>
35 #include <sys/socket.h>
42 #include <netinet/in.h>
43 #include <arpa/inet.h>
45 #include <sys/sendfile.h>
50 static char blktrace_version[] = "0.99.2";
53 * You may want to increase this even more, if you are logging at a high
54 * rate and see skipped/missed events
56 #define BUF_SIZE (512 * 1024)
59 #define OFILE_BUF (128 * 1024)
61 #define DEBUGFS_TYPE 0x64626720
63 #define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
64 static struct option l_opts[] = {
67 .has_arg = required_argument,
73 .has_arg = required_argument,
79 .has_arg = required_argument,
85 .has_arg = required_argument,
91 .has_arg = required_argument,
97 .has_arg = no_argument,
103 .has_arg = required_argument,
109 .has_arg = no_argument,
114 .name = "buffer-size",
115 .has_arg = required_argument,
120 .name = "num-sub-buffers",
121 .has_arg = required_argument,
126 .name = "output-dir",
127 .has_arg = required_argument,
133 .has_arg = no_argument,
139 .has_arg = required_argument,
145 .has_arg = required_argument,
150 .name = "no-sendfile",
151 .has_arg = no_argument,
163 unsigned int max_len;
166 #define FIFO_SIZE (1024) /* should be plenty big! */
167 #define CL_SIZE (128) /* cache line, any bigger? */
169 struct tip_subbuf_fifo {
170 int tail __attribute__((aligned(CL_SIZE)));
171 int head __attribute__((aligned(CL_SIZE)));
172 struct tip_subbuf *q[FIFO_SIZE];
175 struct thread_information {
181 char fn[MAXPATHLEN + 64];
189 int (*get_subbuf)(struct thread_information *, unsigned int);
190 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
191 int (*read_data)(struct thread_information *, void *, unsigned int);
193 unsigned long events_processed;
194 unsigned long long data_read;
195 unsigned long long data_queued;
196 struct device_information *device;
203 struct tip_subbuf_fifo fifo;
204 struct tip_subbuf *leftover_ts;
207 * mmap controlled output files
209 unsigned long long fs_size;
210 unsigned long long fs_max_size;
211 unsigned long fs_off;
213 unsigned long fs_buf_len;
215 struct net_connection *nc;
218 struct device_information {
222 volatile int trace_started;
223 unsigned long drop_count;
224 struct thread_information *threads;
225 unsigned long buf_size;
226 unsigned long buf_nr;
227 unsigned int page_size;
231 time_t cl_connect_time;
235 static struct thread_information *thread_information;
237 static struct device_information *device_information;
239 /* command line option globals */
240 static char *debugfs_path;
241 static char *output_name;
242 static char *output_dir;
243 static int act_mask = ~0U;
244 static int kill_running_trace;
245 static unsigned long buf_size = BUF_SIZE;
246 static unsigned long buf_nr = BUF_NR;
247 static unsigned int page_size;
249 #define is_done() (*(volatile int *)(&done))
250 static volatile int done;
252 #define is_trace_stopped() (*(volatile int *)(&trace_stopped))
253 static volatile int trace_stopped;
255 #define is_stat_shown() (*(volatile int *)(&stat_shown))
256 static volatile int stat_shown;
258 int data_is_native = -1;
260 static void exit_trace(int status);
262 #define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
263 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
265 #define __for_each_dip(__d, __di, __e, __i) \
266 for (__i = 0, __d = __di; __i < __e; __i++, __d++)
268 #define for_each_dip(__d, __i) \
269 __for_each_dip(__d, device_information, ndevs, __i)
270 #define for_each_nc_dip(__nc, __d, __i) \
271 __for_each_dip(__d, (__nc)->ch->device_information, (__nc)->ch->ndevs, __i)
273 #define __for_each_tip(__d, __t, __ncpus, __j) \
274 for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
275 #define for_each_tip(__d, __t, __j) \
276 __for_each_tip(__d, __t, ncpus, __j)
277 #define for_each_cl_host(__c) \
278 for (__c = cl_host_list; __c; __c = __c->list_next)
281 * networking stuff follows. we include a magic number so we know whether
282 * to endianness convert or not
284 struct blktrace_net_hdr {
285 u32 magic; /* same as trace magic */
286 char buts_name[32]; /* trace name */
287 u32 cpu; /* for which cpu */
289 u32 len; /* length of following trace data */
290 u32 cl_id; /* id for set of client per-cpu connections */
291 u32 buf_size; /* client buf_size for this trace */
292 u32 buf_nr; /* client buf_nr for this trace */
293 u32 page_size; /* client page_size for this trace */
296 #define TRACE_NET_PORT (8462)
305 * network cmd line params
307 static char hostname[MAXHOSTNAMELEN];
308 static int net_port = TRACE_NET_PORT;
309 static int net_mode = 0;
310 static int net_use_sendfile = 1;
313 struct cl_host *list_next;
314 struct in_addr cl_in_addr;
315 struct net_connection *net_connections;
317 struct device_information *device_information;
323 struct net_connection {
331 #define NET_MAX_CL_HOSTS (1024)
332 static struct cl_host *cl_host_list;
334 static int net_connects;
336 static int *net_out_fd;
338 static void handle_sigint(__attribute__((__unused__)) int sig)
340 struct device_information *dip;
344 * stop trace so we can reap currently produced data
346 for_each_dip(dip, i) {
349 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
350 perror("BLKTRACESTOP");
356 static int get_dropped_count(const char *buts_name)
359 char tmp[MAXPATHLEN + 64];
361 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
362 debugfs_path, buts_name);
364 fd = open(tmp, O_RDONLY);
367 * this may be ok, if the kernel doesn't support dropped counts
372 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
376 if (read(fd, tmp, sizeof(tmp)) < 0) {
387 static int start_trace(struct device_information *dip)
389 struct blk_user_trace_setup buts;
391 memset(&buts, 0, sizeof(buts));
392 buts.buf_size = dip->buf_size;
393 buts.buf_nr = dip->buf_nr;
394 buts.act_mask = act_mask;
396 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
397 perror("BLKTRACESETUP");
401 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
402 perror("BLKTRACESTART");
406 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
407 dip_set_tracing(dip, 1);
411 static void stop_trace(struct device_information *dip)
413 if (dip_tracing(dip) || kill_running_trace) {
414 dip_set_tracing(dip, 0);
417 * should be stopped, just don't complain if it isn't
419 ioctl(dip->fd, BLKTRACESTOP);
421 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
422 perror("BLKTRACETEARDOWN");
429 static void stop_all_traces(void)
431 struct device_information *dip;
434 for_each_dip(dip, i) {
435 dip->drop_count = get_dropped_count(dip->buts_name);
440 static void wait_for_data(struct thread_information *tip, int timeout)
442 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
445 if (poll(&pfd, 1, timeout) < 0) {
449 if (pfd.revents & POLLIN)
451 if (tip->ofile_stdout)
456 static int read_data_file(struct thread_information *tip, void *buf,
462 wait_for_data(tip, 100);
464 ret = read(tip->fd, buf, len);
470 if (errno != EAGAIN) {
472 fprintf(stderr,"Thread %d failed read of %s\n",
478 } while (!is_done());
484 static int read_data_net(struct thread_information *tip, void *buf,
487 struct net_connection *nc = tip->nc;
488 unsigned int bytes_left = len;
492 ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
497 if (errno != EAGAIN) {
499 fprintf(stderr, "server: failed read\n");
507 } while (!is_done() && bytes_left);
509 return len - bytes_left;
512 static inline struct tip_subbuf *
513 subbuf_fifo_dequeue(struct thread_information *tip)
515 const int head = tip->fifo.head;
516 const int next = (head + 1) & (FIFO_SIZE - 1);
518 if (head != tip->fifo.tail) {
519 struct tip_subbuf *ts = tip->fifo.q[head];
522 tip->fifo.head = next;
529 static inline int subbuf_fifo_queue(struct thread_information *tip,
530 struct tip_subbuf *ts)
532 const int tail = tip->fifo.tail;
533 const int next = (tail + 1) & (FIFO_SIZE - 1);
535 if (next != tip->fifo.head) {
536 tip->fifo.q[tail] = ts;
538 tip->fifo.tail = next;
542 fprintf(stderr, "fifo too small!\n");
547 * For file output, truncate and mmap the file appropriately
549 static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
551 int ofd = fileno(tip->ofile);
556 * extend file, if we have to. use chunks of 16 subbuffers.
558 if (tip->fs_off + maxlen > tip->fs_buf_len) {
560 munlock(tip->fs_buf, tip->fs_buf_len);
561 munmap(tip->fs_buf, tip->fs_buf_len);
565 tip->fs_off = tip->fs_size & (tip->device->page_size - 1);
566 nr = max(16, tip->device->buf_nr);
567 tip->fs_buf_len = (nr * tip->device->buf_size) - tip->fs_off;
568 tip->fs_max_size += tip->fs_buf_len;
570 if (ftruncate(ofd, tip->fs_max_size) < 0) {
575 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
576 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
577 if (tip->fs_buf == MAP_FAILED) {
581 mlock(tip->fs_buf, tip->fs_buf_len);
584 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
586 tip->data_read += ret;
596 * Use the copy approach for pipes and network
598 static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
600 struct tip_subbuf *ts = malloc(sizeof(*ts));
603 ts->buf = malloc(tip->device->buf_size);
604 ts->max_len = maxlen;
606 ret = tip->read_data(tip, ts->buf, ts->max_len);
609 tip->data_read += ret;
610 if (subbuf_fifo_queue(tip, ts))
622 static void close_thread(struct thread_information *tip)
628 if (tip->ofile_buffer)
629 free(tip->ofile_buffer);
635 tip->ofile_buffer = NULL;
639 static void tip_ftrunc_final(struct thread_information *tip)
642 * truncate to right size and cleanup mmap
644 if (tip->ofile_mmap && tip->ofile) {
645 int ofd = fileno(tip->ofile);
648 munmap(tip->fs_buf, tip->fs_buf_len);
650 ftruncate(ofd, tip->fs_size);
654 static void *thread_main(void *arg)
656 struct thread_information *tip = arg;
657 pid_t pid = getpid();
661 CPU_SET((tip->cpu), &cpu_mask);
663 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
664 perror("sched_setaffinity");
668 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
669 debugfs_path, tip->device->buts_name, tip->cpu);
670 tip->fd = open(tip->fn, O_RDONLY);
673 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
679 if (tip->get_subbuf(tip, tip->device->buf_size) < 0)
684 * trace is stopped, pull data until we get a short read
686 while (tip->get_subbuf(tip, tip->device->buf_size) > 0)
689 tip_ftrunc_final(tip);
694 static int write_data_net(int fd, void *buf, unsigned int buf_len)
696 unsigned int bytes_left = buf_len;
700 ret = send(fd, buf, bytes_left, 0);
713 static int net_send_header(struct thread_information *tip, unsigned int len)
715 struct blktrace_net_hdr hdr;
717 hdr.magic = BLK_IO_TRACE_MAGIC;
718 strcpy(hdr.buts_name, tip->device->buts_name);
720 hdr.max_cpus = ncpus;
722 hdr.cl_id = getpid();
723 hdr.buf_size = tip->device->buf_size;
724 hdr.buf_nr = tip->device->buf_nr;
725 hdr.page_size = tip->device->page_size;
727 return write_data_net(net_out_fd[tip->cpu], &hdr, sizeof(hdr));
731 * send header with 0 length to signal end-of-run
733 static void net_client_send_close(void)
735 struct device_information *dip;
736 struct blktrace_net_hdr hdr;
739 for_each_dip(dip, i) {
740 hdr.magic = BLK_IO_TRACE_MAGIC;
741 hdr.max_cpus = ncpus;
743 strcpy(hdr.buts_name, dip->buts_name);
744 hdr.cpu = get_dropped_count(dip->buts_name);
745 hdr.cl_id = getpid();
746 hdr.buf_size = dip->buf_size;
747 hdr.buf_nr = dip->buf_nr;
748 hdr.page_size = dip->page_size;
750 write_data_net(net_out_fd[0], &hdr, sizeof(hdr));
755 static int flush_subbuf_net(struct thread_information *tip,
756 struct tip_subbuf *ts)
758 if (net_send_header(tip, ts->len))
760 if (write_data_net(net_out_fd[tip->cpu], ts->buf, ts->len))
768 static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
770 int ret = sendfile(net_out_fd[tip->cpu], tip->fd, NULL, ts->len);
775 } else if (ret < (int) ts->len) {
776 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
783 static int flush_subbuf_sendfile(struct thread_information *tip,
784 struct tip_subbuf *ts)
788 if (net_send_header(tip, ts->len))
790 if (net_sendfile(tip, ts))
793 tip->data_read += ts->len;
800 static int get_subbuf_sendfile(struct thread_information *tip,
801 __attribute__((__unused__)) unsigned int maxlen)
803 struct tip_subbuf *ts;
807 wait_for_data(tip, -1);
809 if (fstat(tip->fd, &sb) < 0) {
810 perror("trace stat");
814 ready = sb.st_size - tip->data_queued;
820 ts = malloc(sizeof(*ts));
824 tip->data_queued += ready;
826 if (flush_subbuf_sendfile(tip, ts) < 0)
832 static int write_data(struct thread_information *tip, void *buf,
833 unsigned int buf_len)
840 ret = fwrite(buf, buf_len, 1, tip->ofile);
841 if (ferror(tip->ofile) || ret != 1) {
843 clearerr(tip->ofile);
847 if (tip->ofile_stdout)
853 static int flush_subbuf_file(struct thread_information *tip,
854 struct tip_subbuf *ts)
856 unsigned int offset = 0;
857 struct blk_io_trace *t;
858 int pdu_len, events = 0;
861 * surplus from last run
863 if (tip->leftover_ts) {
864 struct tip_subbuf *prev_ts = tip->leftover_ts;
866 if (prev_ts->len + ts->len > prev_ts->max_len) {
867 prev_ts->max_len += ts->len;
868 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
871 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
872 prev_ts->len += ts->len;
878 tip->leftover_ts = NULL;
881 while (offset + sizeof(*t) <= ts->len) {
882 t = ts->buf + offset;
884 if (verify_trace(t)) {
885 write_data(tip, ts->buf, offset);
889 pdu_len = t->pdu_len;
891 if (offset + sizeof(*t) + pdu_len > ts->len)
894 offset += sizeof(*t) + pdu_len;
895 tip->events_processed++;
896 tip->data_read += sizeof(*t) + pdu_len;
900 if (write_data(tip, ts->buf, offset))
904 * leftover bytes, save them for next time
906 if (offset != ts->len) {
907 tip->leftover_ts = ts;
909 memmove(ts->buf, ts->buf + offset, ts->len);
918 static int write_tip_events(struct thread_information *tip)
920 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
923 return tip->flush_subbuf(tip, ts);
929 * scans the tips we know and writes out the subbuffers we accumulate
931 static void get_and_write_events(void)
933 struct device_information *dip;
934 struct thread_information *tip;
935 int i, j, events, ret, tips_running;
940 for_each_dip(dip, i) {
941 for_each_tip(dip, tip, j) {
942 ret = write_tip_events(tip);
958 for_each_dip(dip, i) {
959 for_each_tip(dip, tip, j) {
960 ret = write_tip_events(tip);
963 tips_running += !tip->exited;
967 } while (events || tips_running);
970 static void wait_for_threads(void)
973 * for piped or network output, poll and fetch data for writeout.
974 * for files, we just wait around for trace threads to exit
976 if ((output_name && !strcmp(output_name, "-")) ||
977 ((net_mode == Net_client) && !net_use_sendfile))
978 get_and_write_events();
980 struct device_information *dip;
981 struct thread_information *tip;
982 int i, j, tips_running;
989 for_each_tip(dip, tip, j)
990 tips_running += !tip->exited;
991 } while (tips_running);
994 if (net_mode == Net_client)
995 net_client_send_close();
998 static int fill_ofname(struct device_information *dip,
999 struct thread_information *tip, char *dst,
1006 len = sprintf(dst, "%s/", output_dir);
1008 len = sprintf(dst, "./");
1010 if (net_mode == Net_server) {
1011 struct net_connection *nc = tip->nc;
1013 len += sprintf(dst + len, "%s-", inet_ntoa(nc->ch->cl_in_addr));
1014 len += strftime(dst + len, 64, "%F-%T/", gmtime(&dip->cl_connect_time));
1017 if (stat(dst, &sb) < 0) {
1018 if (errno != ENOENT) {
1022 if (mkdir(dst, 0755) < 0) {
1024 fprintf(stderr, "Can't make output dir\n");
1030 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
1032 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1037 static void fill_ops(struct thread_information *tip)
1042 if (net_mode == Net_client) {
1043 if (net_use_sendfile) {
1044 tip->get_subbuf = get_subbuf_sendfile;
1045 tip->flush_subbuf = NULL;
1047 tip->get_subbuf = get_subbuf;
1048 tip->flush_subbuf = flush_subbuf_net;
1051 if (tip->ofile_mmap)
1052 tip->get_subbuf = mmap_subbuf;
1054 tip->get_subbuf = get_subbuf;
1056 tip->flush_subbuf = flush_subbuf_file;
1059 if (net_mode == Net_server)
1060 tip->read_data = read_data_net;
1062 tip->read_data = read_data_file;
1065 static int tip_open_output(struct device_information *dip,
1066 struct thread_information *tip)
1068 int pipeline = output_name && !strcmp(output_name, "-");
1069 int mode, vbuf_size;
1072 if (net_mode == Net_client) {
1074 tip->ofile_stdout = 0;
1075 tip->ofile_mmap = 0;
1077 } else if (pipeline) {
1078 tip->ofile = fdopen(STDOUT_FILENO, "w");
1079 tip->ofile_stdout = 1;
1080 tip->ofile_mmap = 0;
1084 if (fill_ofname(dip, tip, op, dip->buts_name))
1086 tip->ofile = fopen(op, "w+");
1087 tip->ofile_stdout = 0;
1088 tip->ofile_mmap = 1;
1090 vbuf_size = OFILE_BUF;
1093 if (tip->ofile == NULL) {
1098 tip->ofile_buffer = malloc(vbuf_size);
1099 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1110 static int start_threads(struct device_information *dip)
1112 struct thread_information *tip;
1115 for_each_tip(dip, tip, j) {
1118 tip->events_processed = 0;
1120 memset(&tip->fifo, 0, sizeof(tip->fifo));
1121 tip->leftover_ts = NULL;
1123 if (tip_open_output(dip, tip))
1126 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
1127 perror("pthread_create");
1136 static void stop_threads(struct device_information *dip)
1138 struct thread_information *tip;
1142 for_each_tip(dip, tip, i) {
1143 (void) pthread_join(tip->thread, (void *) &ret);
1148 static void stop_all_threads(void)
1150 struct device_information *dip;
1153 for_each_dip(dip, i)
1157 static void stop_all_tracing(void)
1159 struct device_information *dip;
1162 for_each_dip(dip, i)
1166 static void exit_trace(int status)
1168 if (!is_trace_stopped()) {
1177 static int resize_devices(char *path)
1179 int size = (ndevs + 1) * sizeof(struct device_information);
1181 device_information = realloc(device_information, size);
1182 if (!device_information) {
1183 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1186 device_information[ndevs].path = path;
1191 static int open_devices(void)
1193 struct device_information *dip;
1196 for_each_dip(dip, i) {
1197 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
1202 dip->buf_size = buf_size;
1203 dip->buf_nr = buf_nr;
1204 dip->page_size = page_size;
1210 static int start_devices(void)
1212 struct device_information *dip;
1215 size = ncpus * sizeof(struct thread_information);
1216 thread_information = malloc(size * ndevs);
1217 if (!thread_information) {
1218 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1222 for_each_dip(dip, i) {
1223 if (start_trace(dip)) {
1225 fprintf(stderr, "Failed to start trace on %s\n",
1232 __for_each_dip(dip, device_information, i, j)
1238 for_each_dip(dip, i) {
1239 dip->threads = thread_information + (i * ncpus);
1240 if (start_threads(dip)) {
1241 fprintf(stderr, "Failed to start worker threads\n");
1247 __for_each_dip(dip, device_information, i, j)
1249 for_each_dip(dip, i)
1258 static void show_stats(struct device_information *dips, int ndips, int cpus)
1260 struct device_information *dip;
1261 struct thread_information *tip;
1262 unsigned long long events_processed, data_read;
1263 unsigned long total_drops;
1264 int i, j, no_stdout = 0;
1266 if (is_stat_shown())
1269 if (output_name && !strcmp(output_name, "-"))
1275 __for_each_dip(dip, dips, ndips, i) {
1277 printf("Device: %s\n", dip->path);
1278 events_processed = 0;
1280 __for_each_tip(dip, tip, cpus, j) {
1282 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1283 tip->cpu, tip->events_processed,
1284 (tip->data_read + 1023) >> 10);
1285 events_processed += tip->events_processed;
1286 data_read += tip->data_read;
1288 total_drops += dip->drop_count;
1290 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1291 events_processed, dip->drop_count,
1292 (data_read + 1023) >> 10);
1296 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
1299 static struct device_information *net_get_dip(struct net_connection *nc,
1300 struct blktrace_net_hdr *bnh)
1302 struct device_information *dip, *cl_dip = NULL;
1303 struct cl_host *ch = nc->ch;
1306 for (i = 0; i < ch->ndevs; i++) {
1307 dip = &ch->device_information[i];
1309 if (!strcmp(dip->buts_name, bnh->buts_name))
1312 if (dip->cl_id == bnh->cl_id)
1316 ch->device_information = realloc(ch->device_information, (ch->ndevs + 1) * sizeof(*dip));
1317 dip = &ch->device_information[ch->ndevs];
1318 memset(dip, 0, sizeof(*dip));
1321 dip->cl_id = bnh->cl_id;
1322 dip->buf_size = bnh->buf_size;
1323 dip->buf_nr = bnh->buf_nr;
1324 dip->page_size = bnh->page_size;
1327 dip->cl_connect_time = cl_dip->cl_connect_time;
1329 dip->cl_connect_time = nc->connect_time;
1330 strcpy(dip->buts_name, bnh->buts_name);
1331 dip->path = strdup(bnh->buts_name);
1332 dip->trace_started = 1;
1334 dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
1335 memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
1340 for (i = 0; i < nc->ncpus; i++) {
1341 struct thread_information *tip = &dip->threads[i];
1348 if (tip_open_output(dip, tip))
1357 static struct thread_information *net_get_tip(struct net_connection *nc,
1358 struct blktrace_net_hdr *bnh)
1360 struct device_information *dip;
1361 struct thread_information *tip;
1363 dip = net_get_dip(nc, bnh);
1364 if (!dip->trace_started) {
1365 fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
1369 tip = &dip->threads[bnh->cpu];
1376 static int net_get_header(struct net_connection *nc,
1377 struct blktrace_net_hdr *bnh)
1379 int fl = fcntl(nc->in_fd, F_GETFL);
1380 int bytes_left, ret;
1383 fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
1384 bytes_left = sizeof(*bnh);
1385 while (bytes_left && !is_done()) {
1386 ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
1388 if (errno != EAGAIN) {
1389 perror("recv header");
1402 fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
1407 * finalize a net client: truncate files, show stats, cleanup, etc
1409 static void device_done(struct net_connection *nc, struct device_information *dip)
1411 struct thread_information *tip;
1414 __for_each_tip(dip, tip, nc->ncpus, i)
1415 tip_ftrunc_final(tip);
1417 show_stats(dip, 1, nc->ncpus);
1420 * cleanup for next run
1422 __for_each_tip(dip, tip, nc->ncpus, i) {
1436 static inline int in_addr_eq(struct in_addr a, struct in_addr b)
1438 return a.s_addr == b.s_addr;
1441 static void net_add_client_host(struct cl_host *ch)
1443 ch->list_next = cl_host_list;
1448 static void net_remove_client_host(struct cl_host *ch)
1450 struct cl_host *p, *c;
1452 for (p = c = cl_host_list; c; c = c->list_next) {
1455 cl_host_list = c->list_next;
1457 p->list_next = c->list_next;
1465 static struct cl_host *net_find_client_host(struct in_addr cl_in_addr)
1467 struct cl_host *ch = cl_host_list;
1470 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
1478 static void net_client_host_done(struct cl_host *ch)
1480 free(ch->device_information);
1481 free(ch->net_connections);
1482 net_connects -= ch->nconn;
1483 net_remove_client_host(ch);
1488 * handle incoming events from a net client
1490 static int net_client_data(struct net_connection *nc)
1492 struct thread_information *tip;
1493 struct blktrace_net_hdr bnh;
1495 if (net_get_header(nc, &bnh))
1498 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1499 fprintf(stderr, "server: received data is bad\n");
1503 if (!data_is_native) {
1504 bnh.magic = be32_to_cpu(bnh.magic);
1505 bnh.cpu = be32_to_cpu(bnh.cpu);
1506 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
1507 bnh.len = be32_to_cpu(bnh.len);
1508 bnh.cl_id = be32_to_cpu(bnh.cl_id);
1509 bnh.buf_size = be32_to_cpu(bnh.buf_size);
1510 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
1511 bnh.page_size = be32_to_cpu(bnh.page_size);
1514 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1515 fprintf(stderr, "server: bad data magic\n");
1519 if (nc->ncpus == -1)
1520 nc->ncpus = bnh.max_cpus;
1523 * len == 0 means that the other end signalled end-of-run
1527 * overload cpu count with dropped events
1529 struct device_information *dip;
1531 dip = net_get_dip(nc, &bnh);
1532 dip->drop_count = bnh.cpu;
1533 dip->trace_started = 0;
1535 printf("server: end of run for %s\n", dip->buts_name);
1537 device_done(nc, dip);
1539 if (++nc->ch->ndevs_done == nc->ch->ndevs)
1540 net_client_host_done(nc->ch);
1545 tip = net_get_tip(nc, &bnh);
1549 if (mmap_subbuf(tip, bnh.len))
1555 static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
1557 socklen_t socklen = sizeof(*addr);
1558 struct net_connection *nc;
1562 in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
1568 ch = net_find_client_host(addr->sin_addr);
1570 if (cl_hosts == NET_MAX_CL_HOSTS) {
1571 fprintf(stderr, "server: no more clients allowed\n");
1574 ch = malloc(sizeof(struct cl_host));
1575 memset(ch, 0, sizeof(*ch));
1576 ch->cl_in_addr = addr->sin_addr;
1577 net_add_client_host(ch);
1579 printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
1582 ch->net_connections = realloc(ch->net_connections, (ch->nconn + 1) * sizeof(*nc));
1583 nc = &ch->net_connections[ch->nconn++];
1584 memset(nc, 0, sizeof(*nc));
1586 time(&nc->connect_time);
1594 * event driven loop, handle new incoming connections and data from
1595 * existing connections
1597 static void net_server_handle_connections(int listen_fd,
1598 struct sockaddr_in *addr)
1600 struct pollfd *pfds = NULL;
1601 struct net_connection **ncs = NULL;
1602 int max_connects = 0;
1603 int i, nconns, events;
1605 struct net_connection *nc;
1607 printf("server: waiting for connections...\n");
1609 while (!is_done()) {
1610 if (net_connects >= max_connects) {
1611 pfds = realloc(pfds, (net_connects + 1) * sizeof(*pfds));
1612 ncs = realloc(ncs, (net_connects + 1) * sizeof(*ncs));
1613 max_connects = net_connects + 1;
1616 * the zero entry is for incoming connections, remaining
1617 * entries for clients
1619 pfds[0].fd = listen_fd;
1620 pfds[0].events = POLLIN;
1622 for_each_cl_host(ch) {
1623 for (i = 0; i < ch->nconn; i++) {
1624 nc = &ch->net_connections[i];
1625 pfds[nconns + 1].fd = nc->in_fd;
1626 pfds[nconns + 1].events = POLLIN;
1631 events = poll(pfds, 1 + nconns, -1);
1641 if (pfds[0].revents & POLLIN) {
1642 net_add_connection(listen_fd, addr);
1646 for (i = 0; events && i < nconns; i++) {
1647 if (pfds[i + 1].revents & POLLIN) {
1648 net_client_data(ncs[i]);
1656 * Start here when we are in server mode - just fetch data from the network
1659 static int net_server(void)
1661 struct sockaddr_in addr;
1664 fd = socket(AF_INET, SOCK_STREAM, 0);
1666 perror("server: socket");
1671 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1672 perror("setsockopt");
1676 memset(&addr, 0, sizeof(addr));
1677 addr.sin_family = AF_INET;
1678 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1679 addr.sin_port = htons(net_port);
1681 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1686 if (listen(fd, 1) < 0) {
1691 net_server_handle_connections(fd, &addr);
1696 * Setup outgoing network connection where we will transmit data
1698 static int net_setup_client_cpu(int i, struct sockaddr_in *addr)
1702 fd = socket(AF_INET, SOCK_STREAM, 0);
1704 perror("client: socket");
1708 if (connect(fd, (struct sockaddr *) addr, sizeof(*addr)) < 0) {
1709 perror("client: connect");
1717 static int net_setup_client(void)
1719 struct sockaddr_in addr;
1722 memset(&addr, 0, sizeof(addr));
1723 addr.sin_family = AF_INET;
1724 addr.sin_port = htons(net_port);
1726 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1727 struct hostent *hent = gethostbyname(hostname);
1729 perror("gethostbyname");
1733 memcpy(&addr.sin_addr, hent->h_addr, 4);
1734 strcpy(hostname, hent->h_name);
1737 printf("blktrace: connecting to %s\n", hostname);
1739 net_out_fd = malloc(ncpus * sizeof(*net_out_fd));
1740 for (i = 0; i < ncpus; i++) {
1741 if (net_setup_client_cpu(i, &addr))
1745 printf("blktrace: connected!\n");
1750 static char usage_str[] = \
1751 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1752 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1753 "\t-d Use specified device. May also be given last after options\n" \
1754 "\t-r Path to mounted debugfs, defaults to /debug\n" \
1755 "\t-o File(s) to send output to\n" \
1756 "\t-D Directory to prepend to output file names\n" \
1757 "\t-k Kill a running trace\n" \
1758 "\t-w Stop after defined time, in seconds\n" \
1759 "\t-a Only trace specified actions. See documentation\n" \
1760 "\t-A Give trace mask as a single value. See documentation\n" \
1761 "\t-b Sub buffer size in KiB\n" \
1762 "\t-n Number of sub buffers\n" \
1763 "\t-l Run in network listen mode (blktrace server)\n" \
1764 "\t-h Run in network client mode, connecting to the given host\n" \
1765 "\t-p Network port to use (default 8462)\n" \
1766 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
1767 "\t-V Print program version info\n\n";
1769 static void show_usage(char *program)
1771 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
1774 int main(int argc, char *argv[])
1776 static char default_debugfs_path[] = "/sys/kernel/debug";
1780 int act_mask_tmp = 0;
1782 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1785 i = find_mask_map(optarg);
1787 fprintf(stderr,"Invalid action mask %s\n",
1795 if ((sscanf(optarg, "%x", &i) != 1) ||
1796 !valid_act_opt(i)) {
1798 "Invalid set action mask %s/0x%x\n",
1806 if (resize_devices(optarg) != 0)
1811 debugfs_path = optarg;
1815 output_name = optarg;
1818 kill_running_trace = 1;
1821 stop_watch = atoi(optarg);
1822 if (stop_watch <= 0) {
1824 "Invalid stopwatch value (%d secs)\n",
1830 printf("%s version %s\n", argv[0], blktrace_version);
1833 buf_size = strtoul(optarg, NULL, 10);
1834 if (buf_size <= 0 || buf_size > 16*1024) {
1836 "Invalid buffer size (%lu)\n",buf_size);
1842 buf_nr = strtoul(optarg, NULL, 10);
1845 "Invalid buffer nr (%lu)\n", buf_nr);
1850 output_dir = optarg;
1853 net_mode = Net_client;
1854 strcpy(hostname, optarg);
1857 net_mode = Net_server;
1860 net_port = atoi(optarg);
1863 net_use_sendfile = 0;
1866 show_usage(argv[0]);
1871 setlocale(LC_NUMERIC, "en_US");
1873 page_size = getpagesize();
1875 if (net_mode == Net_server) {
1877 fprintf(stderr, "-o ignored in server mode\n");
1881 return net_server();
1884 while (optind < argc) {
1885 if (resize_devices(argv[optind++]) != 0)
1890 show_usage(argv[0]);
1894 if (act_mask_tmp != 0)
1895 act_mask = act_mask_tmp;
1898 debugfs_path = default_debugfs_path;
1900 if (statfs(debugfs_path, &st) < 0) {
1902 fprintf(stderr,"%s does not appear to be a valid path\n",
1905 } else if (st.f_type != (long) DEBUGFS_TYPE) {
1906 fprintf(stderr,"%s does not appear to be a debug filesystem\n",
1911 if (open_devices() != 0)
1914 if (kill_running_trace) {
1919 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1921 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
1925 signal(SIGINT, handle_sigint);
1926 signal(SIGHUP, handle_sigint);
1927 signal(SIGTERM, handle_sigint);
1928 signal(SIGALRM, handle_sigint);
1929 signal(SIGPIPE, SIG_IGN);
1931 if (net_mode == Net_client && net_setup_client())
1934 if (start_devices() != 0)
1937 atexit(stop_all_tracing);
1944 if (!is_trace_stopped()) {
1950 show_stats(device_information, ndevs, ncpus);