2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <sys/types.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
34 #include <sys/socket.h>
41 #include <netinet/in.h>
42 #include <arpa/inet.h>
44 #include <sys/sendfile.h>
49 static char blktrace_version[] = "0.99.1";
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
55 #define BUF_SIZE (512 * 1024)
58 #define OFILE_BUF (128 * 1024)
60 #define RELAYFS_TYPE 0xF0B4A981
62 #define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
63 static struct option l_opts[] = {
66 .has_arg = required_argument,
72 .has_arg = required_argument,
78 .has_arg = required_argument,
84 .has_arg = required_argument,
90 .has_arg = required_argument,
96 .has_arg = no_argument,
102 .has_arg = required_argument,
108 .has_arg = no_argument,
113 .name = "buffer-size",
114 .has_arg = required_argument,
119 .name = "num-sub-buffers",
120 .has_arg = required_argument,
125 .name = "output-dir",
126 .has_arg = required_argument,
132 .has_arg = no_argument,
138 .has_arg = required_argument,
144 .has_arg = required_argument,
150 .has_arg = no_argument,
162 unsigned int max_len;
165 #define FIFO_SIZE (1024) /* should be plenty big! */
166 #define CL_SIZE (128) /* cache line, any bigger? */
168 struct tip_subbuf_fifo {
169 int tail __attribute__((aligned(CL_SIZE)));
170 int head __attribute__((aligned(CL_SIZE)));
171 struct tip_subbuf *q[FIFO_SIZE];
174 struct thread_information {
180 char fn[MAXPATHLEN + 64];
188 int (*get_subbuf)(struct thread_information *, unsigned int);
189 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
190 int (*read_data)(struct thread_information *, void *, unsigned int);
192 unsigned long events_processed;
193 unsigned long long data_read;
194 unsigned long long data_queued;
195 struct device_information *device;
202 struct tip_subbuf_fifo fifo;
203 struct tip_subbuf *leftover_ts;
206 * mmap controlled output files
208 unsigned long long fs_size;
209 unsigned long long fs_max_size;
210 unsigned long fs_off;
212 unsigned long fs_buf_len;
215 struct device_information {
219 volatile int trace_started;
220 unsigned long drop_count;
221 struct thread_information *threads;
222 struct net_connection *nc;
226 static struct thread_information *thread_information;
228 static struct device_information *device_information;
230 /* command line option globals */
231 static char *relay_path;
232 static char *output_name;
233 static char *output_dir;
234 static int act_mask = ~0U;
235 static int kill_running_trace;
236 static unsigned long buf_size = BUF_SIZE;
237 static unsigned long buf_nr = BUF_NR;
238 static unsigned int page_size;
240 #define is_done() (*(volatile int *)(&done))
241 static volatile int done;
243 #define is_trace_stopped() (*(volatile int *)(&trace_stopped))
244 static volatile int trace_stopped;
246 #define is_stat_shown() (*(volatile int *)(&stat_shown))
247 static volatile int stat_shown;
249 int data_is_native = -1;
251 static void exit_trace(int status);
253 #define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
254 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
256 #define __for_each_dip(__d, __di, __e, __i) \
257 for (__i = 0, __d = __di; __i < __e; __i++, __d++)
259 #define for_each_dip(__d, __i) \
260 __for_each_dip(__d, device_information, ndevs, __i)
261 #define for_each_nc_dip(__nc, __d, __i) \
262 __for_each_dip(__d, (__nc)->device_information, (__nc)->ndevs, __i)
264 #define __for_each_tip(__d, __t, __ncpus, __j) \
265 for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
266 #define for_each_tip(__d, __t, __j) \
267 __for_each_tip(__d, __t, ncpus, __j)
270 * networking stuff follows. we include a magic number so we know whether
271 * to endianness convert or not
273 struct blktrace_net_hdr {
274 u32 magic; /* same as trace magic */
275 char buts_name[32]; /* trace name */
276 u32 cpu; /* for which cpu */
278 u32 len; /* length of following trace data */
281 #define TRACE_NET_PORT (8462)
290 * network cmd line params
292 static char hostname[MAXHOSTNAMELEN];
293 static int net_port = TRACE_NET_PORT;
294 static int net_mode = 0;
295 static int net_use_sendfile;
297 struct net_connection {
300 struct in_addr cl_in_addr;
301 struct device_information *device_information;
304 int connection_index;
307 #define NET_MAX_CONNECTIONS (1024)
308 static struct net_connection net_connections[NET_MAX_CONNECTIONS];
309 static int net_connects;
310 static int net_out_fd = -1;
312 static void handle_sigint(__attribute__((__unused__)) int sig)
314 struct device_information *dip;
318 * stop trace so we can reap currently produced data
320 for_each_dip(dip, i) {
323 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
324 perror("BLKTRACESTOP");
330 static int get_dropped_count(const char *buts_name)
333 char tmp[MAXPATHLEN + 64];
335 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
336 relay_path, buts_name);
338 fd = open(tmp, O_RDONLY);
341 * this may be ok, if the kernel doesn't support dropped counts
346 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
350 if (read(fd, tmp, sizeof(tmp)) < 0) {
361 static int start_trace(struct device_information *dip)
363 struct blk_user_trace_setup buts;
365 memset(&buts, 0, sizeof(buts));
366 buts.buf_size = buf_size;
367 buts.buf_nr = buf_nr;
368 buts.act_mask = act_mask;
370 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
371 perror("BLKTRACESETUP");
375 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
376 perror("BLKTRACESTART");
380 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
381 dip_set_tracing(dip, 1);
385 static void stop_trace(struct device_information *dip)
387 if (dip_tracing(dip) || kill_running_trace) {
388 dip_set_tracing(dip, 0);
391 * should be stopped, just don't complain if it isn't
393 ioctl(dip->fd, BLKTRACESTOP);
395 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
396 perror("BLKTRACETEARDOWN");
403 static void stop_all_traces(void)
405 struct device_information *dip;
408 for_each_dip(dip, i) {
409 dip->drop_count = get_dropped_count(dip->buts_name);
414 static void wait_for_data(struct thread_information *tip, int events)
416 struct pollfd pfd = { .fd = tip->fd, .events = events };
419 if (poll(&pfd, 1, 100) < 0) {
423 if (pfd.revents & events)
425 if (tip->ofile_stdout)
427 } while (!is_done());
430 static int read_data_file(struct thread_information *tip, void *buf,
436 wait_for_data(tip, POLLIN);
438 ret = read(tip->fd, buf, len);
444 if (errno != EAGAIN) {
446 fprintf(stderr,"Thread %d failed read of %s\n",
452 } while (!is_done());
458 static int read_data_net(struct thread_information *tip, void *buf,
461 struct net_connection *nc = tip->device->nc;
462 unsigned int bytes_left = len;
466 ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
471 if (errno != EAGAIN) {
473 fprintf(stderr, "server: failed read\n");
481 } while (!is_done() && bytes_left);
483 return len - bytes_left;
486 static inline struct tip_subbuf *
487 subbuf_fifo_dequeue(struct thread_information *tip)
489 const int head = tip->fifo.head;
490 const int next = (head + 1) & (FIFO_SIZE - 1);
492 if (head != tip->fifo.tail) {
493 struct tip_subbuf *ts = tip->fifo.q[head];
496 tip->fifo.head = next;
503 static inline int subbuf_fifo_queue(struct thread_information *tip,
504 struct tip_subbuf *ts)
506 const int tail = tip->fifo.tail;
507 const int next = (tail + 1) & (FIFO_SIZE - 1);
509 if (next != tip->fifo.head) {
510 tip->fifo.q[tail] = ts;
512 tip->fifo.tail = next;
516 fprintf(stderr, "fifo too small!\n");
521 * For file output, truncate and mmap the file appropriately
523 static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
525 int ofd = fileno(tip->ofile);
529 * extend file, if we have to. use chunks of 16 subbuffers.
531 if (tip->fs_off + buf_size > tip->fs_buf_len) {
533 munlock(tip->fs_buf, tip->fs_buf_len);
534 munmap(tip->fs_buf, tip->fs_buf_len);
538 tip->fs_off = tip->fs_size & (page_size - 1);
539 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
540 tip->fs_max_size += tip->fs_buf_len;
542 if (ftruncate(ofd, tip->fs_max_size) < 0) {
547 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
548 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
549 if (tip->fs_buf == MAP_FAILED) {
553 mlock(tip->fs_buf, tip->fs_buf_len);
556 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
558 tip->data_read += ret;
568 * Use the copy approach for pipes and network
570 static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
572 struct tip_subbuf *ts = malloc(sizeof(*ts));
575 ts->buf = malloc(buf_size);
576 ts->max_len = maxlen;
578 ret = tip->read_data(tip, ts->buf, ts->max_len);
581 tip->data_read += ret;
582 if (subbuf_fifo_queue(tip, ts))
589 static int get_subbuf_sendfile(struct thread_information *tip,
592 struct tip_subbuf *ts;
596 wait_for_data(tip, POLLMSG);
599 * hack to get last data out, we can't use sendfile for that
602 return get_subbuf(tip, maxlen);
604 if (fstat(tip->fd, &sb) < 0) {
605 perror("trace stat");
608 ready = sb.st_size - tip->data_queued;
614 ts = malloc(sizeof(*ts));
618 tip->data_queued += ready;
620 if (subbuf_fifo_queue(tip, ts))
626 static void close_thread(struct thread_information *tip)
632 if (tip->ofile_buffer)
633 free(tip->ofile_buffer);
639 tip->ofile_buffer = NULL;
643 static void tip_ftrunc_final(struct thread_information *tip)
646 * truncate to right size and cleanup mmap
648 if (tip->ofile_mmap && tip->ofile) {
649 int ofd = fileno(tip->ofile);
652 munmap(tip->fs_buf, tip->fs_buf_len);
654 ftruncate(ofd, tip->fs_size);
658 static void *thread_main(void *arg)
660 struct thread_information *tip = arg;
661 pid_t pid = getpid();
665 CPU_SET((tip->cpu), &cpu_mask);
667 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
668 perror("sched_setaffinity");
672 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
673 relay_path, tip->device->buts_name, tip->cpu);
674 tip->fd = open(tip->fn, O_RDONLY);
677 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
683 if (tip->get_subbuf(tip, buf_size) < 0)
688 * trace is stopped, pull data until we get a short read
690 while (tip->get_subbuf(tip, buf_size) > 0)
693 tip_ftrunc_final(tip);
698 static int write_data_net(int fd, void *buf, unsigned int buf_len)
700 unsigned int bytes_left = buf_len;
704 ret = send(fd, buf, bytes_left, 0);
717 static int net_send_header(struct thread_information *tip, unsigned int len)
719 struct blktrace_net_hdr hdr;
721 hdr.magic = BLK_IO_TRACE_MAGIC;
722 strcpy(hdr.buts_name, tip->device->buts_name);
724 hdr.max_cpus = ncpus;
727 return write_data_net(net_out_fd, &hdr, sizeof(hdr));
731 * send header with 0 length to signal end-of-run
733 static void net_client_send_close(void)
735 struct device_information *dip;
736 struct blktrace_net_hdr hdr;
739 for_each_dip(dip, i) {
740 hdr.magic = BLK_IO_TRACE_MAGIC;
741 hdr.max_cpus = ncpus;
743 strcpy(hdr.buts_name, dip->buts_name);
744 hdr.cpu = get_dropped_count(dip->buts_name);
746 write_data_net(net_out_fd, &hdr, sizeof(hdr));
751 static int flush_subbuf_net(struct thread_information *tip,
752 struct tip_subbuf *ts)
754 if (net_send_header(tip, ts->len))
756 if (write_data_net(net_out_fd, ts->buf, ts->len))
764 static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
766 int ret = sendfile(net_out_fd, tip->fd, NULL, ts->len);
771 } else if (ret < (int) ts->len) {
772 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
779 static int flush_subbuf_sendfile(struct thread_information *tip,
780 struct tip_subbuf *ts)
785 * currently we cannot use sendfile() on the last bytes read, as they
786 * may not be a full subbuffer. get_subbuf_sendfile() falls back to
787 * the read approach for those, so use send() to ship them out
790 return flush_subbuf_net(tip, ts);
792 if (net_send_header(tip, ts->len))
794 if (net_sendfile(tip, ts))
797 tip->data_read += ts->len;
798 tip->ofile_offset += buf_size;
805 static int write_data(struct thread_information *tip, void *buf,
806 unsigned int buf_len)
814 ret = fwrite(buf, buf_len, 1, tip->ofile);
824 if (tip->ofile_stdout)
830 static int flush_subbuf_file(struct thread_information *tip,
831 struct tip_subbuf *ts)
833 unsigned int offset = 0;
834 struct blk_io_trace *t;
835 int pdu_len, events = 0;
838 * surplus from last run
840 if (tip->leftover_ts) {
841 struct tip_subbuf *prev_ts = tip->leftover_ts;
843 if (prev_ts->len + ts->len > prev_ts->max_len) {
844 prev_ts->max_len += ts->len;
845 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
848 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
849 prev_ts->len += ts->len;
855 tip->leftover_ts = NULL;
858 while (offset + sizeof(*t) <= ts->len) {
859 t = ts->buf + offset;
861 if (verify_trace(t)) {
862 write_data(tip, ts->buf, offset);
866 pdu_len = t->pdu_len;
868 if (offset + sizeof(*t) + pdu_len > ts->len)
871 offset += sizeof(*t) + pdu_len;
872 tip->events_processed++;
873 tip->data_read += sizeof(*t) + pdu_len;
877 if (write_data(tip, ts->buf, offset))
881 * leftover bytes, save them for next time
883 if (offset != ts->len) {
884 tip->leftover_ts = ts;
886 memmove(ts->buf, ts->buf + offset, ts->len);
895 static int write_tip_events(struct thread_information *tip)
897 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
900 return tip->flush_subbuf(tip, ts);
906 * scans the tips we know and writes out the subbuffers we accumulate
908 static void get_and_write_events(void)
910 struct device_information *dip;
911 struct thread_information *tip;
912 int i, j, events, ret, tips_running;
917 for_each_dip(dip, i) {
918 for_each_tip(dip, tip, j) {
919 ret = write_tip_events(tip);
935 for_each_dip(dip, i) {
936 for_each_tip(dip, tip, j) {
937 ret = write_tip_events(tip);
940 tips_running += !tip->exited;
944 } while (events || tips_running);
947 static void wait_for_threads(void)
950 * for piped or network output, poll and fetch data for writeout.
951 * for files, we just wait around for trace threads to exit
953 if ((output_name && !strcmp(output_name, "-")) ||
954 net_mode == Net_client)
955 get_and_write_events();
957 struct device_information *dip;
958 struct thread_information *tip;
959 int i, j, tips_running;
966 for_each_tip(dip, tip, j)
967 tips_running += !tip->exited;
968 } while (tips_running);
971 if (net_mode == Net_client)
972 net_client_send_close();
975 static int fill_ofname(struct device_information *dip,
976 struct thread_information *tip, char *dst,
983 len = sprintf(dst, "%s/", output_dir);
985 if (net_mode == Net_server) {
986 struct net_connection *nc = dip->nc;
988 len += sprintf(dst + len, "%s-", inet_ntoa(nc->cl_in_addr));
989 len += strftime(dst + len, 64, "%F-%T/", gmtime(&nc->connect_time));
992 if (stat(dst, &sb) < 0) {
993 if (errno != ENOENT) {
997 if (mkdir(dst, 0755) < 0) {
999 fprintf(stderr, "Can't make output dir\n");
1005 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
1007 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1012 static void fill_ops(struct thread_information *tip)
1017 if (net_mode == Net_client) {
1018 if (net_use_sendfile) {
1019 tip->get_subbuf = get_subbuf_sendfile;
1020 tip->flush_subbuf = flush_subbuf_sendfile;
1022 tip->get_subbuf = get_subbuf;
1023 tip->flush_subbuf = flush_subbuf_net;
1026 if (tip->ofile_mmap)
1027 tip->get_subbuf = mmap_subbuf;
1029 tip->get_subbuf = get_subbuf;
1031 tip->flush_subbuf = flush_subbuf_file;
1034 if (net_mode == Net_server)
1035 tip->read_data = read_data_net;
1037 tip->read_data = read_data_file;
1040 static int tip_open_output(struct device_information *dip,
1041 struct thread_information *tip)
1043 int pipeline = output_name && !strcmp(output_name, "-");
1044 int mode, vbuf_size;
1047 if (net_mode == Net_client) {
1049 tip->ofile_stdout = 0;
1050 tip->ofile_mmap = 0;
1052 } else if (pipeline) {
1053 tip->ofile = fdopen(STDOUT_FILENO, "w");
1054 tip->ofile_stdout = 1;
1055 tip->ofile_mmap = 0;
1059 if (fill_ofname(dip, tip, op, dip->buts_name))
1061 tip->ofile = fopen(op, "w+");
1062 tip->ofile_stdout = 0;
1063 tip->ofile_mmap = 1;
1065 vbuf_size = OFILE_BUF;
1068 if (tip->ofile == NULL) {
1073 tip->ofile_buffer = malloc(vbuf_size);
1074 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1085 static int start_threads(struct device_information *dip)
1087 struct thread_information *tip;
1090 for_each_tip(dip, tip, j) {
1093 tip->events_processed = 0;
1095 memset(&tip->fifo, 0, sizeof(tip->fifo));
1096 tip->leftover_ts = NULL;
1098 if (tip_open_output(dip, tip))
1101 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
1102 perror("pthread_create");
1111 static void stop_threads(struct device_information *dip)
1113 struct thread_information *tip;
1117 for_each_tip(dip, tip, i) {
1118 (void) pthread_join(tip->thread, (void *) &ret);
1123 static void stop_all_threads(void)
1125 struct device_information *dip;
1128 for_each_dip(dip, i)
1132 static void stop_all_tracing(void)
1134 struct device_information *dip;
1137 for_each_dip(dip, i)
1141 static void exit_trace(int status)
1143 if (!is_trace_stopped()) {
1152 static int resize_devices(char *path)
1154 int size = (ndevs + 1) * sizeof(struct device_information);
1156 device_information = realloc(device_information, size);
1157 if (!device_information) {
1158 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1161 device_information[ndevs].path = path;
1166 static int open_devices(void)
1168 struct device_information *dip;
1171 for_each_dip(dip, i) {
1172 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
1182 static int start_devices(void)
1184 struct device_information *dip;
1187 size = ncpus * sizeof(struct thread_information);
1188 thread_information = malloc(size * ndevs);
1189 if (!thread_information) {
1190 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1194 for_each_dip(dip, i) {
1195 if (start_trace(dip)) {
1197 fprintf(stderr, "Failed to start trace on %s\n",
1204 __for_each_dip(dip, device_information, i, j)
1210 for_each_dip(dip, i) {
1211 dip->threads = thread_information + (i * ncpus);
1212 if (start_threads(dip)) {
1213 fprintf(stderr, "Failed to start worker threads\n");
1219 __for_each_dip(dip, device_information, i, j)
1221 for_each_dip(dip, i)
1230 static void show_stats(struct device_information *dips, int ndips, int cpus)
1232 struct device_information *dip;
1233 struct thread_information *tip;
1234 unsigned long long events_processed, data_read;
1235 unsigned long total_drops;
1236 int i, j, no_stdout = 0;
1238 if (is_stat_shown())
1241 if (output_name && !strcmp(output_name, "-"))
1247 __for_each_dip(dip, dips, ndips, i) {
1249 printf("Device: %s\n", dip->path);
1250 events_processed = 0;
1252 __for_each_tip(dip, tip, cpus, j) {
1254 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1255 tip->cpu, tip->events_processed,
1256 (tip->data_read + 1023) >> 10);
1257 events_processed += tip->events_processed;
1258 data_read += tip->data_read;
1260 total_drops += dip->drop_count;
1262 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1263 events_processed, dip->drop_count,
1264 (data_read + 1023) >> 10);
1268 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
1271 static struct device_information *net_get_dip(struct net_connection *nc,
1274 struct device_information *dip;
1277 for (i = 0; i < nc->ndevs; i++) {
1278 dip = &nc->device_information[i];
1280 if (!strcmp(dip->buts_name, buts_name))
1284 nc->device_information = realloc(nc->device_information, (nc->ndevs + 1) * sizeof(*dip));
1285 dip = &nc->device_information[nc->ndevs];
1286 memset(dip, 0, sizeof(*dip));
1289 strcpy(dip->buts_name, buts_name);
1290 dip->path = strdup(buts_name);
1291 dip->trace_started = 1;
1293 dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
1294 memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
1299 for (i = 0; i < nc->ncpus; i++) {
1300 struct thread_information *tip = &dip->threads[i];
1306 if (tip_open_output(dip, tip))
1313 static struct thread_information *net_get_tip(struct net_connection *nc,
1314 struct blktrace_net_hdr *bnh)
1316 struct device_information *dip;
1318 nc->ncpus = bnh->max_cpus;
1319 dip = net_get_dip(nc, bnh->buts_name);
1320 if (!dip->trace_started) {
1321 fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
1325 return &dip->threads[bnh->cpu];
1328 static int net_get_header(struct net_connection *nc,
1329 struct blktrace_net_hdr *bnh)
1331 int fl = fcntl(nc->in_fd, F_GETFL);
1332 int bytes_left, ret;
1335 fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
1336 bytes_left = sizeof(*bnh);
1337 while (bytes_left && !is_done()) {
1338 ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
1340 if (errno != EAGAIN) {
1341 perror("recv header");
1354 fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
1359 * finalize a net client: truncate files, show stats, cleanup, etc
1361 static void net_client_done(struct net_connection *nc)
1363 struct device_information *dip;
1364 struct thread_information *tip;
1365 struct net_connection *last_nc;
1368 for_each_nc_dip(nc, dip, i)
1369 __for_each_tip(dip, tip, nc->ncpus, j)
1370 tip_ftrunc_final(tip);
1372 show_stats(nc->device_information, nc->ndevs, nc->ncpus);
1375 * cleanup for next run
1377 for_each_nc_dip(nc, dip, i) {
1378 __for_each_tip(dip, tip, nc->ncpus, j) {
1387 free(nc->device_information);
1388 nc->device_information = NULL;
1389 nc->ncpus = nc->ndevs = 0;
1397 * now put last entry where this one was, a little nasty since we
1398 * need to adjust dip->nc as well
1400 if (nc->connection_index != net_connects) {
1401 last_nc = &net_connections[net_connects];
1403 for_each_nc_dip(nc, dip, i)
1411 * handle incoming events from a net client
1413 static int net_client_data(struct net_connection *nc)
1415 struct thread_information *tip;
1416 struct blktrace_net_hdr bnh;
1418 if (net_get_header(nc, &bnh))
1421 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1422 fprintf(stderr, "server: received data is bad\n");
1426 if (!data_is_native) {
1427 bnh.magic = be32_to_cpu(bnh.magic);
1428 bnh.cpu = be32_to_cpu(bnh.cpu);
1429 bnh.len = be32_to_cpu(bnh.len);
1432 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1433 fprintf(stderr, "server: bad data magic\n");
1438 * len == 0 means that the other end signalled end-of-run
1442 * overload cpu count with dropped events
1444 struct device_information *dip;
1446 dip = net_get_dip(nc, bnh.buts_name);
1447 dip->drop_count = bnh.cpu;
1448 dip->trace_started = 0;
1450 printf("server: end of run for %s\n", dip->buts_name);
1451 net_client_done(nc);
1455 tip = net_get_tip(nc, &bnh);
1459 if (mmap_subbuf(tip, bnh.len))
1465 static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
1467 socklen_t socklen = sizeof(*addr);
1468 struct net_connection *nc;
1470 if (net_connects == NET_MAX_CONNECTIONS) {
1471 fprintf(stderr, "server: no more connections allowed\n");
1475 nc = &net_connections[net_connects];
1476 memset(nc, 0, sizeof(*nc));
1478 nc->in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
1479 if (nc->in_fd < 0) {
1484 printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
1485 time(&nc->connect_time);
1486 nc->connection_index = net_connects;
1487 nc->cl_in_addr = addr->sin_addr;
1492 * event driven loop, handle new incoming connections and data from
1493 * existing connections
1495 static void net_server_handle_connections(int listen_fd,
1496 struct sockaddr_in *addr)
1498 struct pollfd pfds[NET_MAX_CONNECTIONS + 1];
1501 printf("server: waiting for connections...\n");
1503 while (!is_done()) {
1505 * the zero entry is for incoming connections, remaining
1506 * entries for clients
1508 pfds[0].fd = listen_fd;
1509 pfds[0].events = POLLIN;
1510 for (i = 0; i < net_connects; i++) {
1511 pfds[i + 1].fd = net_connections[i].in_fd;
1512 pfds[i + 1].events = POLLIN;
1515 events = poll(pfds, 1 + net_connects, -1);
1525 if (pfds[0].revents & POLLIN) {
1526 net_add_connection(listen_fd, addr);
1530 for (i = 0; events && i < net_connects; i++) {
1531 if (pfds[i + 1].revents & POLLIN) {
1532 net_client_data(&net_connections[i]);
1540 * Start here when we are in server mode - just fetch data from the network
1543 static int net_server(void)
1545 struct sockaddr_in addr;
1548 fd = socket(AF_INET, SOCK_STREAM, 0);
1550 perror("server: socket");
1555 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1556 perror("setsockopt");
1560 memset(&addr, 0, sizeof(addr));
1561 addr.sin_family = AF_INET;
1562 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1563 addr.sin_port = htons(net_port);
1565 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1570 if (listen(fd, 1) < 0) {
1575 net_server_handle_connections(fd, &addr);
1580 * Setup outgoing network connection where we will transmit data
1582 static int net_setup_client(void)
1584 struct sockaddr_in addr;
1587 fd = socket(AF_INET, SOCK_STREAM, 0);
1589 perror("client: socket");
1593 memset(&addr, 0, sizeof(addr));
1594 addr.sin_family = AF_INET;
1595 addr.sin_port = htons(net_port);
1597 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1598 struct hostent *hent = gethostbyname(hostname);
1600 perror("gethostbyname");
1604 memcpy(&addr.sin_addr, hent->h_addr, 4);
1605 strcpy(hostname, hent->h_name);
1608 printf("blktrace: connecting to %s\n", hostname);
1610 if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1611 perror("client: connect");
1615 printf("blktrace: connected!\n");
1620 static char usage_str[] = \
1621 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1622 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1623 "\t-d Use specified device. May also be given last after options\n" \
1624 "\t-r Path to mounted relayfs, defaults to /relay\n" \
1625 "\t-o File(s) to send output to\n" \
1626 "\t-D Directory to prepend to output file names\n" \
1627 "\t-k Kill a running trace\n" \
1628 "\t-w Stop after defined time, in seconds\n" \
1629 "\t-a Only trace specified actions. See documentation\n" \
1630 "\t-A Give trace mask as a single value. See documentation\n" \
1631 "\t-b Sub buffer size in KiB\n" \
1632 "\t-n Number of sub buffers\n" \
1633 "\t-l Run in network listen mode (blktrace server)\n" \
1634 "\t-h Run in network client mode, connecting to the given host\n" \
1635 "\t-p Network port to use (default 8462)\n" \
1636 "\t-s Make the network client use sendfile() to transfer data\n" \
1637 "\t-V Print program version info\n\n";
1639 static void show_usage(char *program)
1641 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
1644 int main(int argc, char *argv[])
1646 static char default_relay_path[] = "/relay";
1650 int act_mask_tmp = 0;
1652 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1655 i = find_mask_map(optarg);
1657 fprintf(stderr,"Invalid action mask %s\n",
1665 if ((sscanf(optarg, "%x", &i) != 1) ||
1666 !valid_act_opt(i)) {
1668 "Invalid set action mask %s/0x%x\n",
1676 if (resize_devices(optarg) != 0)
1681 relay_path = optarg;
1685 output_name = optarg;
1688 kill_running_trace = 1;
1691 stop_watch = atoi(optarg);
1692 if (stop_watch <= 0) {
1694 "Invalid stopwatch value (%d secs)\n",
1700 printf("%s version %s\n", argv[0], blktrace_version);
1703 buf_size = strtoul(optarg, NULL, 10);
1704 if (buf_size <= 0 || buf_size > 16*1024) {
1706 "Invalid buffer size (%lu)\n",buf_size);
1712 buf_nr = strtoul(optarg, NULL, 10);
1715 "Invalid buffer nr (%lu)\n", buf_nr);
1720 output_dir = optarg;
1723 net_mode = Net_client;
1724 strcpy(hostname, optarg);
1727 net_mode = Net_server;
1730 net_port = atoi(optarg);
1733 net_use_sendfile = 1;
1736 show_usage(argv[0]);
1741 setlocale(LC_NUMERIC, "en_US");
1743 page_size = getpagesize();
1745 if (net_mode == Net_server)
1746 return net_server();
1748 while (optind < argc) {
1749 if (resize_devices(argv[optind++]) != 0)
1754 show_usage(argv[0]);
1759 relay_path = default_relay_path;
1761 if (act_mask_tmp != 0)
1762 act_mask = act_mask_tmp;
1764 if (statfs(relay_path, &st) < 0) {
1766 fprintf(stderr,"%s does not appear to be a valid path\n",
1769 } else if (st.f_type != (long) RELAYFS_TYPE) {
1770 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
1775 if (open_devices() != 0)
1778 if (kill_running_trace) {
1783 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1785 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
1789 signal(SIGINT, handle_sigint);
1790 signal(SIGHUP, handle_sigint);
1791 signal(SIGTERM, handle_sigint);
1792 signal(SIGALRM, handle_sigint);
1794 if (net_mode == Net_client && net_setup_client())
1797 if (start_devices() != 0)
1800 atexit(stop_all_tracing);
1807 if (!is_trace_stopped()) {
1813 show_stats(device_information, ndevs, ncpus);