2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <sys/types.h>
30 #include <sys/ioctl.h>
31 #include <sys/param.h>
32 #include <sys/statfs.h>
35 #include <sys/socket.h>
42 #include <netinet/in.h>
43 #include <arpa/inet.h>
45 #include <sys/sendfile.h>
50 static char blktrace_version[] = "1.0.0";
53 * You may want to increase this even more, if you are logging at a high
54 * rate and see skipped/missed events
56 #define BUF_SIZE (512 * 1024)
59 #define OFILE_BUF (128 * 1024)
61 #define DEBUGFS_TYPE 0x64626720
63 #define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
64 static struct option l_opts[] = {
67 .has_arg = required_argument,
73 .has_arg = required_argument,
79 .has_arg = required_argument,
85 .has_arg = required_argument,
91 .has_arg = required_argument,
97 .has_arg = required_argument,
103 .has_arg = no_argument,
109 .has_arg = required_argument,
115 .has_arg = no_argument,
121 .has_arg = no_argument,
126 .name = "buffer-size",
127 .has_arg = required_argument,
132 .name = "num-sub-buffers",
133 .has_arg = required_argument,
138 .name = "output-dir",
139 .has_arg = required_argument,
145 .has_arg = no_argument,
151 .has_arg = required_argument,
157 .has_arg = required_argument,
162 .name = "no-sendfile",
163 .has_arg = no_argument,
175 unsigned int max_len;
178 #define FIFO_SIZE (1024) /* should be plenty big! */
179 #define CL_SIZE (128) /* cache line, any bigger? */
181 struct tip_subbuf_fifo {
182 int tail __attribute__((aligned(CL_SIZE)));
183 int head __attribute__((aligned(CL_SIZE)));
184 struct tip_subbuf *q[FIFO_SIZE];
187 struct thread_information {
193 char fn[MAXPATHLEN + 64];
201 int (*get_subbuf)(struct thread_information *, unsigned int);
202 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
203 int (*read_data)(struct thread_information *, void *, unsigned int);
205 unsigned long events_processed;
206 unsigned long long data_read;
207 unsigned long long data_queued;
208 struct device_information *device;
215 struct tip_subbuf_fifo fifo;
216 struct tip_subbuf *leftover_ts;
219 * mmap controlled output files
221 unsigned long long fs_size;
222 unsigned long long fs_max_size;
223 unsigned long fs_off;
225 unsigned long fs_buf_len;
227 struct net_connection *nc;
230 struct device_information {
234 volatile int trace_started;
235 unsigned long drop_count;
236 struct thread_information *threads;
237 unsigned long buf_size;
238 unsigned long buf_nr;
239 unsigned int page_size;
243 time_t cl_connect_time;
247 static struct thread_information *thread_information;
249 static struct device_information *device_information;
251 /* command line option globals */
252 static char *debugfs_path;
253 static char *output_name;
254 static char *output_dir;
255 static int act_mask = ~0U;
256 static int kill_running_trace;
257 static unsigned long buf_size = BUF_SIZE;
258 static unsigned long buf_nr = BUF_NR;
259 static unsigned int page_size;
261 #define is_done() (*(volatile int *)(&done))
262 static volatile int done;
264 #define is_trace_stopped() (*(volatile int *)(&trace_stopped))
265 static volatile int trace_stopped;
267 #define is_stat_shown() (*(volatile int *)(&stat_shown))
268 static volatile int stat_shown;
270 int data_is_native = -1;
272 static void exit_trace(int status);
274 #define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
275 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
277 #define __for_each_dip(__d, __di, __e, __i) \
278 for (__i = 0, __d = __di; __i < __e; __i++, __d++)
280 #define for_each_dip(__d, __i) \
281 __for_each_dip(__d, device_information, ndevs, __i)
282 #define for_each_nc_dip(__nc, __d, __i) \
283 __for_each_dip(__d, (__nc)->ch->device_information, (__nc)->ch->ndevs, __i)
285 #define __for_each_tip(__d, __t, __ncpus, __j) \
286 for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
287 #define for_each_tip(__d, __t, __j) \
288 __for_each_tip(__d, __t, ncpus, __j)
289 #define for_each_cl_host(__c) \
290 for (__c = cl_host_list; __c; __c = __c->list_next)
293 * networking stuff follows. we include a magic number so we know whether
294 * to endianness convert or not
296 struct blktrace_net_hdr {
297 u32 magic; /* same as trace magic */
298 char buts_name[32]; /* trace name */
299 u32 cpu; /* for which cpu */
301 u32 len; /* length of following trace data */
302 u32 cl_id; /* id for set of client per-cpu connections */
303 u32 buf_size; /* client buf_size for this trace */
304 u32 buf_nr; /* client buf_nr for this trace */
305 u32 page_size; /* client page_size for this trace */
308 #define TRACE_NET_PORT (8462)
317 * network cmd line params
319 static char hostname[MAXHOSTNAMELEN];
320 static int net_port = TRACE_NET_PORT;
321 static int net_mode = 0;
322 static int net_use_sendfile = 1;
325 struct cl_host *list_next;
326 struct in_addr cl_in_addr;
327 struct net_connection *net_connections;
329 struct device_information *device_information;
335 struct net_connection {
343 #define NET_MAX_CL_HOSTS (1024)
344 static struct cl_host *cl_host_list;
346 static int net_connects;
348 static int *net_out_fd;
350 static void handle_sigint(__attribute__((__unused__)) int sig)
352 struct device_information *dip;
356 * stop trace so we can reap currently produced data
358 for_each_dip(dip, i) {
361 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
362 perror("BLKTRACESTOP");
368 static int get_dropped_count(const char *buts_name)
371 char tmp[MAXPATHLEN + 64];
373 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
374 debugfs_path, buts_name);
376 fd = open(tmp, O_RDONLY);
379 * this may be ok, if the kernel doesn't support dropped counts
384 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
388 if (read(fd, tmp, sizeof(tmp)) < 0) {
399 static int start_trace(struct device_information *dip)
401 struct blk_user_trace_setup buts;
403 memset(&buts, 0, sizeof(buts));
404 buts.buf_size = dip->buf_size;
405 buts.buf_nr = dip->buf_nr;
406 buts.act_mask = act_mask;
408 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
409 perror("BLKTRACESETUP");
413 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
414 perror("BLKTRACESTART");
418 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
419 dip_set_tracing(dip, 1);
423 static void stop_trace(struct device_information *dip)
425 if (dip_tracing(dip) || kill_running_trace) {
426 dip_set_tracing(dip, 0);
429 * should be stopped, just don't complain if it isn't
431 ioctl(dip->fd, BLKTRACESTOP);
433 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
434 perror("BLKTRACETEARDOWN");
441 static void stop_all_traces(void)
443 struct device_information *dip;
446 for_each_dip(dip, i) {
447 dip->drop_count = get_dropped_count(dip->buts_name);
452 static void wait_for_data(struct thread_information *tip, int timeout)
454 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
457 if (poll(&pfd, 1, timeout) < 0) {
461 if (pfd.revents & POLLIN)
463 if (tip->ofile_stdout)
468 static int read_data_file(struct thread_information *tip, void *buf,
474 wait_for_data(tip, 100);
476 ret = read(tip->fd, buf, len);
482 if (errno != EAGAIN) {
484 fprintf(stderr,"Thread %d failed read of %s\n",
490 } while (!is_done());
496 static int read_data_net(struct thread_information *tip, void *buf,
499 struct net_connection *nc = tip->nc;
500 unsigned int bytes_left = len;
504 ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
509 if (errno != EAGAIN) {
511 fprintf(stderr, "server: failed read\n");
519 } while (!is_done() && bytes_left);
521 return len - bytes_left;
524 static inline struct tip_subbuf *
525 subbuf_fifo_dequeue(struct thread_information *tip)
527 const int head = tip->fifo.head;
528 const int next = (head + 1) & (FIFO_SIZE - 1);
530 if (head != tip->fifo.tail) {
531 struct tip_subbuf *ts = tip->fifo.q[head];
534 tip->fifo.head = next;
541 static inline int subbuf_fifo_queue(struct thread_information *tip,
542 struct tip_subbuf *ts)
544 const int tail = tip->fifo.tail;
545 const int next = (tail + 1) & (FIFO_SIZE - 1);
547 if (next != tip->fifo.head) {
548 tip->fifo.q[tail] = ts;
550 tip->fifo.tail = next;
554 fprintf(stderr, "fifo too small!\n");
559 * For file output, truncate and mmap the file appropriately
561 static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
563 int ofd = fileno(tip->ofile);
568 * extend file, if we have to. use chunks of 16 subbuffers.
570 if (tip->fs_off + maxlen > tip->fs_buf_len) {
572 munlock(tip->fs_buf, tip->fs_buf_len);
573 munmap(tip->fs_buf, tip->fs_buf_len);
577 tip->fs_off = tip->fs_size & (tip->device->page_size - 1);
578 nr = max(16, tip->device->buf_nr);
579 tip->fs_buf_len = (nr * tip->device->buf_size) - tip->fs_off;
580 tip->fs_max_size += tip->fs_buf_len;
582 if (ftruncate(ofd, tip->fs_max_size) < 0) {
587 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
588 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
589 if (tip->fs_buf == MAP_FAILED) {
593 mlock(tip->fs_buf, tip->fs_buf_len);
596 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
598 tip->data_read += ret;
608 * Use the copy approach for pipes and network
610 static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
612 struct tip_subbuf *ts = malloc(sizeof(*ts));
615 ts->buf = malloc(tip->device->buf_size);
616 ts->max_len = maxlen;
618 ret = tip->read_data(tip, ts->buf, ts->max_len);
621 tip->data_read += ret;
622 if (subbuf_fifo_queue(tip, ts))
634 static void close_thread(struct thread_information *tip)
640 if (tip->ofile_buffer)
641 free(tip->ofile_buffer);
647 tip->ofile_buffer = NULL;
651 static void tip_ftrunc_final(struct thread_information *tip)
654 * truncate to right size and cleanup mmap
656 if (tip->ofile_mmap && tip->ofile) {
657 int ofd = fileno(tip->ofile);
660 munmap(tip->fs_buf, tip->fs_buf_len);
662 ftruncate(ofd, tip->fs_size);
666 static void *thread_main(void *arg)
668 struct thread_information *tip = arg;
669 pid_t pid = getpid();
673 CPU_SET((tip->cpu), &cpu_mask);
675 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
676 perror("sched_setaffinity");
680 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
681 debugfs_path, tip->device->buts_name, tip->cpu);
682 tip->fd = open(tip->fn, O_RDONLY);
685 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
691 if (tip->get_subbuf(tip, tip->device->buf_size) < 0)
696 * trace is stopped, pull data until we get a short read
698 while (tip->get_subbuf(tip, tip->device->buf_size) > 0)
701 tip_ftrunc_final(tip);
706 static int write_data_net(int fd, void *buf, unsigned int buf_len)
708 unsigned int bytes_left = buf_len;
712 ret = send(fd, buf, bytes_left, 0);
725 static int net_send_header(struct thread_information *tip, unsigned int len)
727 struct blktrace_net_hdr hdr;
729 hdr.magic = BLK_IO_TRACE_MAGIC;
730 strcpy(hdr.buts_name, tip->device->buts_name);
732 hdr.max_cpus = ncpus;
734 hdr.cl_id = getpid();
735 hdr.buf_size = tip->device->buf_size;
736 hdr.buf_nr = tip->device->buf_nr;
737 hdr.page_size = tip->device->page_size;
739 return write_data_net(net_out_fd[tip->cpu], &hdr, sizeof(hdr));
743 * send header with 0 length to signal end-of-run
745 static void net_client_send_close(void)
747 struct device_information *dip;
748 struct blktrace_net_hdr hdr;
751 for_each_dip(dip, i) {
752 hdr.magic = BLK_IO_TRACE_MAGIC;
753 hdr.max_cpus = ncpus;
755 strcpy(hdr.buts_name, dip->buts_name);
756 hdr.cpu = get_dropped_count(dip->buts_name);
757 hdr.cl_id = getpid();
758 hdr.buf_size = dip->buf_size;
759 hdr.buf_nr = dip->buf_nr;
760 hdr.page_size = dip->page_size;
762 write_data_net(net_out_fd[0], &hdr, sizeof(hdr));
767 static int flush_subbuf_net(struct thread_information *tip,
768 struct tip_subbuf *ts)
770 if (net_send_header(tip, ts->len))
772 if (write_data_net(net_out_fd[tip->cpu], ts->buf, ts->len))
780 static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
782 int ret = sendfile(net_out_fd[tip->cpu], tip->fd, NULL, ts->len);
787 } else if (ret < (int) ts->len) {
788 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
795 static int flush_subbuf_sendfile(struct thread_information *tip,
796 struct tip_subbuf *ts)
800 if (net_send_header(tip, ts->len))
802 if (net_sendfile(tip, ts))
805 tip->data_read += ts->len;
812 static int get_subbuf_sendfile(struct thread_information *tip,
813 __attribute__((__unused__)) unsigned int maxlen)
815 struct tip_subbuf *ts;
819 wait_for_data(tip, -1);
821 if (fstat(tip->fd, &sb) < 0) {
822 perror("trace stat");
826 ready = sb.st_size - tip->data_queued;
832 ts = malloc(sizeof(*ts));
836 tip->data_queued += ready;
838 if (flush_subbuf_sendfile(tip, ts) < 0)
844 static int write_data(struct thread_information *tip, void *buf,
845 unsigned int buf_len)
852 ret = fwrite(buf, buf_len, 1, tip->ofile);
853 if (ferror(tip->ofile) || ret != 1) {
855 clearerr(tip->ofile);
859 if (tip->ofile_stdout)
865 static int flush_subbuf_file(struct thread_information *tip,
866 struct tip_subbuf *ts)
868 unsigned int offset = 0;
869 struct blk_io_trace *t;
870 int pdu_len, events = 0;
873 * surplus from last run
875 if (tip->leftover_ts) {
876 struct tip_subbuf *prev_ts = tip->leftover_ts;
878 if (prev_ts->len + ts->len > prev_ts->max_len) {
879 prev_ts->max_len += ts->len;
880 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
883 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
884 prev_ts->len += ts->len;
890 tip->leftover_ts = NULL;
893 while (offset + sizeof(*t) <= ts->len) {
894 t = ts->buf + offset;
896 if (verify_trace(t)) {
897 write_data(tip, ts->buf, offset);
901 pdu_len = t->pdu_len;
903 if (offset + sizeof(*t) + pdu_len > ts->len)
906 offset += sizeof(*t) + pdu_len;
907 tip->events_processed++;
908 tip->data_read += sizeof(*t) + pdu_len;
912 if (write_data(tip, ts->buf, offset))
916 * leftover bytes, save them for next time
918 if (offset != ts->len) {
919 tip->leftover_ts = ts;
921 memmove(ts->buf, ts->buf + offset, ts->len);
930 static int write_tip_events(struct thread_information *tip)
932 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
935 return tip->flush_subbuf(tip, ts);
941 * scans the tips we know and writes out the subbuffers we accumulate
943 static void get_and_write_events(void)
945 struct device_information *dip;
946 struct thread_information *tip;
947 int i, j, events, ret, tips_running;
952 for_each_dip(dip, i) {
953 for_each_tip(dip, tip, j) {
954 ret = write_tip_events(tip);
970 for_each_dip(dip, i) {
971 for_each_tip(dip, tip, j) {
972 ret = write_tip_events(tip);
975 tips_running += !tip->exited;
979 } while (events || tips_running);
982 static void wait_for_threads(void)
985 * for piped or network output, poll and fetch data for writeout.
986 * for files, we just wait around for trace threads to exit
988 if ((output_name && !strcmp(output_name, "-")) ||
989 ((net_mode == Net_client) && !net_use_sendfile))
990 get_and_write_events();
992 struct device_information *dip;
993 struct thread_information *tip;
994 int i, j, tips_running;
1000 for_each_dip(dip, i)
1001 for_each_tip(dip, tip, j)
1002 tips_running += !tip->exited;
1003 } while (tips_running);
1006 if (net_mode == Net_client)
1007 net_client_send_close();
1010 static int fill_ofname(struct device_information *dip,
1011 struct thread_information *tip, char *dst,
1018 len = sprintf(dst, "%s/", output_dir);
1020 len = sprintf(dst, "./");
1022 if (net_mode == Net_server) {
1023 struct net_connection *nc = tip->nc;
1025 len += sprintf(dst + len, "%s-", inet_ntoa(nc->ch->cl_in_addr));
1026 len += strftime(dst + len, 64, "%F-%T/", gmtime(&dip->cl_connect_time));
1029 if (stat(dst, &sb) < 0) {
1030 if (errno != ENOENT) {
1034 if (mkdir(dst, 0755) < 0) {
1036 fprintf(stderr, "Can't make output dir\n");
1042 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
1044 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1049 static void fill_ops(struct thread_information *tip)
1054 if (net_mode == Net_client) {
1055 if (net_use_sendfile) {
1056 tip->get_subbuf = get_subbuf_sendfile;
1057 tip->flush_subbuf = NULL;
1059 tip->get_subbuf = get_subbuf;
1060 tip->flush_subbuf = flush_subbuf_net;
1063 if (tip->ofile_mmap)
1064 tip->get_subbuf = mmap_subbuf;
1066 tip->get_subbuf = get_subbuf;
1068 tip->flush_subbuf = flush_subbuf_file;
1071 if (net_mode == Net_server)
1072 tip->read_data = read_data_net;
1074 tip->read_data = read_data_file;
1077 static int tip_open_output(struct device_information *dip,
1078 struct thread_information *tip)
1080 int pipeline = output_name && !strcmp(output_name, "-");
1081 int mode, vbuf_size;
1084 if (net_mode == Net_client) {
1086 tip->ofile_stdout = 0;
1087 tip->ofile_mmap = 0;
1089 } else if (pipeline) {
1090 tip->ofile = fdopen(STDOUT_FILENO, "w");
1091 tip->ofile_stdout = 1;
1092 tip->ofile_mmap = 0;
1096 if (fill_ofname(dip, tip, op, dip->buts_name))
1098 tip->ofile = fopen(op, "w+");
1099 tip->ofile_stdout = 0;
1100 tip->ofile_mmap = 1;
1102 vbuf_size = OFILE_BUF;
1105 if (tip->ofile == NULL) {
1110 tip->ofile_buffer = malloc(vbuf_size);
1111 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1122 static int start_threads(struct device_information *dip)
1124 struct thread_information *tip;
1127 for_each_tip(dip, tip, j) {
1130 tip->events_processed = 0;
1132 memset(&tip->fifo, 0, sizeof(tip->fifo));
1133 tip->leftover_ts = NULL;
1135 if (tip_open_output(dip, tip))
1138 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
1139 perror("pthread_create");
1148 static void stop_threads(struct device_information *dip)
1150 struct thread_information *tip;
1154 for_each_tip(dip, tip, i) {
1155 (void) pthread_join(tip->thread, (void *) &ret);
1160 static void stop_all_threads(void)
1162 struct device_information *dip;
1165 for_each_dip(dip, i)
1169 static void stop_all_tracing(void)
1171 struct device_information *dip;
1174 for_each_dip(dip, i)
1178 static void exit_trace(int status)
1180 if (!is_trace_stopped()) {
1189 static int resize_devices(char *path)
1191 int size = (ndevs + 1) * sizeof(struct device_information);
1193 device_information = realloc(device_information, size);
1194 if (!device_information) {
1195 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1198 device_information[ndevs].path = path;
1203 static int open_devices(void)
1205 struct device_information *dip;
1208 for_each_dip(dip, i) {
1209 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
1214 dip->buf_size = buf_size;
1215 dip->buf_nr = buf_nr;
1216 dip->page_size = page_size;
1222 static int start_devices(void)
1224 struct device_information *dip;
1227 size = ncpus * sizeof(struct thread_information);
1228 thread_information = malloc(size * ndevs);
1229 if (!thread_information) {
1230 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1233 memset(thread_information, 0, size * ndevs);
1235 for_each_dip(dip, i) {
1236 if (start_trace(dip)) {
1238 fprintf(stderr, "Failed to start trace on %s\n",
1245 __for_each_dip(dip, device_information, i, j)
1251 for_each_dip(dip, i) {
1252 dip->threads = thread_information + (i * ncpus);
1253 if (start_threads(dip)) {
1254 fprintf(stderr, "Failed to start worker threads\n");
1260 __for_each_dip(dip, device_information, i, j)
1262 for_each_dip(dip, i)
1271 static void show_stats(struct device_information *dips, int ndips, int cpus)
1273 struct device_information *dip;
1274 struct thread_information *tip;
1275 unsigned long long events_processed, data_read;
1276 unsigned long total_drops;
1277 int i, j, no_stdout = 0;
1279 if (is_stat_shown())
1282 if (output_name && !strcmp(output_name, "-"))
1288 __for_each_dip(dip, dips, ndips, i) {
1290 printf("Device: %s\n", dip->path);
1291 events_processed = 0;
1293 __for_each_tip(dip, tip, cpus, j) {
1295 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1296 tip->cpu, tip->events_processed,
1297 (tip->data_read + 1023) >> 10);
1298 events_processed += tip->events_processed;
1299 data_read += tip->data_read;
1301 total_drops += dip->drop_count;
1303 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1304 events_processed, dip->drop_count,
1305 (data_read + 1023) >> 10);
1309 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
1312 static struct device_information *net_get_dip(struct net_connection *nc,
1313 struct blktrace_net_hdr *bnh)
1315 struct device_information *dip, *cl_dip = NULL;
1316 struct cl_host *ch = nc->ch;
1319 for (i = 0; i < ch->ndevs; i++) {
1320 dip = &ch->device_information[i];
1322 if (!strcmp(dip->buts_name, bnh->buts_name))
1325 if (dip->cl_id == bnh->cl_id)
1329 ch->device_information = realloc(ch->device_information, (ch->ndevs + 1) * sizeof(*dip));
1330 dip = &ch->device_information[ch->ndevs];
1331 memset(dip, 0, sizeof(*dip));
1334 dip->cl_id = bnh->cl_id;
1335 dip->buf_size = bnh->buf_size;
1336 dip->buf_nr = bnh->buf_nr;
1337 dip->page_size = bnh->page_size;
1340 dip->cl_connect_time = cl_dip->cl_connect_time;
1342 dip->cl_connect_time = nc->connect_time;
1343 strcpy(dip->buts_name, bnh->buts_name);
1344 dip->path = strdup(bnh->buts_name);
1345 dip->trace_started = 1;
1347 dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
1348 memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
1353 for (i = 0; i < nc->ncpus; i++) {
1354 struct thread_information *tip = &dip->threads[i];
1361 if (tip_open_output(dip, tip))
1370 static struct thread_information *net_get_tip(struct net_connection *nc,
1371 struct blktrace_net_hdr *bnh)
1373 struct device_information *dip;
1374 struct thread_information *tip;
1376 dip = net_get_dip(nc, bnh);
1377 if (!dip->trace_started) {
1378 fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
1382 tip = &dip->threads[bnh->cpu];
1389 static int net_get_header(struct net_connection *nc,
1390 struct blktrace_net_hdr *bnh)
1392 int fl = fcntl(nc->in_fd, F_GETFL);
1393 int bytes_left, ret;
1396 fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
1397 bytes_left = sizeof(*bnh);
1398 while (bytes_left && !is_done()) {
1399 ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
1401 if (errno != EAGAIN) {
1402 perror("recv header");
1415 fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
1420 * finalize a net client: truncate files, show stats, cleanup, etc
1422 static void device_done(struct net_connection *nc, struct device_information *dip)
1424 struct thread_information *tip;
1427 __for_each_tip(dip, tip, nc->ncpus, i)
1428 tip_ftrunc_final(tip);
1430 show_stats(dip, 1, nc->ncpus);
1433 * cleanup for next run
1435 __for_each_tip(dip, tip, nc->ncpus, i) {
1449 static inline int in_addr_eq(struct in_addr a, struct in_addr b)
1451 return a.s_addr == b.s_addr;
1454 static void net_add_client_host(struct cl_host *ch)
1456 ch->list_next = cl_host_list;
1461 static void net_remove_client_host(struct cl_host *ch)
1463 struct cl_host *p, *c;
1465 for (p = c = cl_host_list; c; c = c->list_next) {
1468 cl_host_list = c->list_next;
1470 p->list_next = c->list_next;
1478 static struct cl_host *net_find_client_host(struct in_addr cl_in_addr)
1480 struct cl_host *ch = cl_host_list;
1483 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
1491 static void net_client_host_done(struct cl_host *ch)
1493 free(ch->device_information);
1494 free(ch->net_connections);
1495 net_connects -= ch->nconn;
1496 net_remove_client_host(ch);
1501 * handle incoming events from a net client
1503 static int net_client_data(struct net_connection *nc)
1505 struct thread_information *tip;
1506 struct blktrace_net_hdr bnh;
1508 if (net_get_header(nc, &bnh))
1511 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1512 fprintf(stderr, "server: received data is bad\n");
1516 if (!data_is_native) {
1517 bnh.magic = be32_to_cpu(bnh.magic);
1518 bnh.cpu = be32_to_cpu(bnh.cpu);
1519 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
1520 bnh.len = be32_to_cpu(bnh.len);
1521 bnh.cl_id = be32_to_cpu(bnh.cl_id);
1522 bnh.buf_size = be32_to_cpu(bnh.buf_size);
1523 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
1524 bnh.page_size = be32_to_cpu(bnh.page_size);
1527 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1528 fprintf(stderr, "server: bad data magic\n");
1532 if (nc->ncpus == -1)
1533 nc->ncpus = bnh.max_cpus;
1536 * len == 0 means that the other end signalled end-of-run
1540 * overload cpu count with dropped events
1542 struct device_information *dip;
1544 dip = net_get_dip(nc, &bnh);
1545 dip->drop_count = bnh.cpu;
1546 dip->trace_started = 0;
1548 printf("server: end of run for %s\n", dip->buts_name);
1550 device_done(nc, dip);
1552 if (++nc->ch->ndevs_done == nc->ch->ndevs)
1553 net_client_host_done(nc->ch);
1558 tip = net_get_tip(nc, &bnh);
1562 if (mmap_subbuf(tip, bnh.len))
1568 static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
1570 socklen_t socklen = sizeof(*addr);
1571 struct net_connection *nc;
1575 in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
1581 ch = net_find_client_host(addr->sin_addr);
1583 if (cl_hosts == NET_MAX_CL_HOSTS) {
1584 fprintf(stderr, "server: no more clients allowed\n");
1587 ch = malloc(sizeof(struct cl_host));
1588 memset(ch, 0, sizeof(*ch));
1589 ch->cl_in_addr = addr->sin_addr;
1590 net_add_client_host(ch);
1592 printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
1595 ch->net_connections = realloc(ch->net_connections, (ch->nconn + 1) * sizeof(*nc));
1596 nc = &ch->net_connections[ch->nconn++];
1597 memset(nc, 0, sizeof(*nc));
1599 time(&nc->connect_time);
1607 * event driven loop, handle new incoming connections and data from
1608 * existing connections
1610 static void net_server_handle_connections(int listen_fd,
1611 struct sockaddr_in *addr)
1613 struct pollfd *pfds = NULL;
1614 struct net_connection **ncs = NULL;
1615 int max_connects = 0;
1616 int i, nconns, events;
1618 struct net_connection *nc;
1620 printf("server: waiting for connections...\n");
1622 while (!is_done()) {
1623 if (net_connects >= max_connects) {
1624 pfds = realloc(pfds, (net_connects + 1) * sizeof(*pfds));
1625 ncs = realloc(ncs, (net_connects + 1) * sizeof(*ncs));
1626 max_connects = net_connects + 1;
1629 * the zero entry is for incoming connections, remaining
1630 * entries for clients
1632 pfds[0].fd = listen_fd;
1633 pfds[0].events = POLLIN;
1635 for_each_cl_host(ch) {
1636 for (i = 0; i < ch->nconn; i++) {
1637 nc = &ch->net_connections[i];
1638 pfds[nconns + 1].fd = nc->in_fd;
1639 pfds[nconns + 1].events = POLLIN;
1644 events = poll(pfds, 1 + nconns, -1);
1654 if (pfds[0].revents & POLLIN) {
1655 net_add_connection(listen_fd, addr);
1659 for (i = 0; events && i < nconns; i++) {
1660 if (pfds[i + 1].revents & POLLIN) {
1661 net_client_data(ncs[i]);
1669 * Start here when we are in server mode - just fetch data from the network
1672 static int net_server(void)
1674 struct sockaddr_in addr;
1677 fd = socket(AF_INET, SOCK_STREAM, 0);
1679 perror("server: socket");
1684 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1685 perror("setsockopt");
1689 memset(&addr, 0, sizeof(addr));
1690 addr.sin_family = AF_INET;
1691 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1692 addr.sin_port = htons(net_port);
1694 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1699 if (listen(fd, 1) < 0) {
1704 net_server_handle_connections(fd, &addr);
1709 * Setup outgoing network connection where we will transmit data
1711 static int net_setup_client_cpu(int i, struct sockaddr_in *addr)
1715 fd = socket(AF_INET, SOCK_STREAM, 0);
1717 perror("client: socket");
1721 if (connect(fd, (struct sockaddr *) addr, sizeof(*addr)) < 0) {
1722 perror("client: connect");
1730 static int net_setup_client(void)
1732 struct sockaddr_in addr;
1735 memset(&addr, 0, sizeof(addr));
1736 addr.sin_family = AF_INET;
1737 addr.sin_port = htons(net_port);
1739 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1740 struct hostent *hent = gethostbyname(hostname);
1742 perror("gethostbyname");
1746 memcpy(&addr.sin_addr, hent->h_addr, 4);
1747 strcpy(hostname, hent->h_name);
1750 printf("blktrace: connecting to %s\n", hostname);
1752 net_out_fd = malloc(ncpus * sizeof(*net_out_fd));
1753 for (i = 0; i < ncpus; i++) {
1754 if (net_setup_client_cpu(i, &addr))
1758 printf("blktrace: connected!\n");
1763 static char usage_str[] = \
1764 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1765 "[ -a action ] [ -A action mask ] [ -I <devs file> ] [ -v ]\n\n" \
1766 "\t-d Use specified device. May also be given last after options\n" \
1767 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
1768 "\t-o File(s) to send output to\n" \
1769 "\t-D Directory to prepend to output file names\n" \
1770 "\t-k Kill a running trace\n" \
1771 "\t-w Stop after defined time, in seconds\n" \
1772 "\t-a Only trace specified actions. See documentation\n" \
1773 "\t-A Give trace mask as a single value. See documentation\n" \
1774 "\t-b Sub buffer size in KiB\n" \
1775 "\t-n Number of sub buffers\n" \
1776 "\t-l Run in network listen mode (blktrace server)\n" \
1777 "\t-h Run in network client mode, connecting to the given host\n" \
1778 "\t-p Network port to use (default 8462)\n" \
1779 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
1780 "\t-I Add devices found in <devs file>\n" \
1781 "\t-V Print program version info\n\n";
1783 static void show_usage(char *program)
1785 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
1788 int main(int argc, char *argv[])
1790 static char default_debugfs_path[] = "/sys/kernel/debug";
1794 int act_mask_tmp = 0;
1796 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1799 i = find_mask_map(optarg);
1801 fprintf(stderr,"Invalid action mask %s\n",
1809 if ((sscanf(optarg, "%x", &i) != 1) ||
1810 !valid_act_opt(i)) {
1812 "Invalid set action mask %s/0x%x\n",
1820 if (resize_devices(optarg) != 0)
1826 FILE *ifp = fopen(optarg, "r");
1830 "Invalid file for devices %s\n",
1835 while (fscanf(ifp, "%s\n", dev_line) == 1)
1836 if (resize_devices(strdup(dev_line)) != 0)
1843 debugfs_path = optarg;
1847 output_name = optarg;
1850 kill_running_trace = 1;
1853 stop_watch = atoi(optarg);
1854 if (stop_watch <= 0) {
1856 "Invalid stopwatch value (%d secs)\n",
1863 printf("%s version %s\n", argv[0], blktrace_version);
1866 buf_size = strtoul(optarg, NULL, 10);
1867 if (buf_size <= 0 || buf_size > 16*1024) {
1869 "Invalid buffer size (%lu)\n",buf_size);
1875 buf_nr = strtoul(optarg, NULL, 10);
1878 "Invalid buffer nr (%lu)\n", buf_nr);
1883 output_dir = optarg;
1886 net_mode = Net_client;
1887 strcpy(hostname, optarg);
1890 net_mode = Net_server;
1893 net_port = atoi(optarg);
1896 net_use_sendfile = 0;
1899 show_usage(argv[0]);
1904 setlocale(LC_NUMERIC, "en_US");
1906 page_size = getpagesize();
1908 if (net_mode == Net_server) {
1910 fprintf(stderr, "-o ignored in server mode\n");
1914 return net_server();
1917 while (optind < argc) {
1918 if (resize_devices(argv[optind++]) != 0)
1923 show_usage(argv[0]);
1927 if (act_mask_tmp != 0)
1928 act_mask = act_mask_tmp;
1931 debugfs_path = default_debugfs_path;
1933 if (statfs(debugfs_path, &st) < 0) {
1935 fprintf(stderr,"%s does not appear to be a valid path\n",
1938 } else if (st.f_type != (long) DEBUGFS_TYPE) {
1939 fprintf(stderr,"%s does not appear to be a debug filesystem\n",
1944 if (open_devices() != 0)
1947 if (kill_running_trace) {
1952 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1954 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
1958 signal(SIGINT, handle_sigint);
1959 signal(SIGHUP, handle_sigint);
1960 signal(SIGTERM, handle_sigint);
1961 signal(SIGALRM, handle_sigint);
1962 signal(SIGPIPE, SIG_IGN);
1964 if (net_mode == Net_client && net_setup_client())
1967 if (start_devices() != 0)
1970 atexit(stop_all_tracing);
1977 if (!is_trace_stopped()) {
1983 show_stats(device_information, ndevs, ncpus);