2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <sys/types.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
34 #include <sys/socket.h>
41 #include <netinet/in.h>
42 #include <arpa/inet.h>
44 #include <sys/sendfile.h>
49 static char blktrace_version[] = "0.99";
52 * You may want to increase this even more, if you are logging at a high
53 * rate and see skipped/missed events
55 #define BUF_SIZE (512 * 1024)
58 #define OFILE_BUF (128 * 1024)
60 #define RELAYFS_TYPE 0xF0B4A981
62 #define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
63 static struct option l_opts[] = {
66 .has_arg = required_argument,
72 .has_arg = required_argument,
78 .has_arg = required_argument,
84 .has_arg = required_argument,
90 .has_arg = required_argument,
96 .has_arg = no_argument,
102 .has_arg = required_argument,
108 .has_arg = no_argument,
113 .name = "buffer-size",
114 .has_arg = required_argument,
119 .name = "num-sub-buffers",
120 .has_arg = required_argument,
125 .name = "output-dir",
126 .has_arg = required_argument,
132 .has_arg = no_argument,
138 .has_arg = required_argument,
144 .has_arg = required_argument,
150 .has_arg = no_argument,
162 unsigned int max_len;
166 #define FIFO_SIZE (1024) /* should be plenty big! */
167 #define CL_SIZE (128) /* cache line, any bigger? */
169 struct tip_subbuf_fifo {
170 int tail __attribute__((aligned(CL_SIZE)));
171 int head __attribute__((aligned(CL_SIZE)));
172 struct tip_subbuf *q[FIFO_SIZE];
175 struct thread_information {
181 char fn[MAXPATHLEN + 64];
186 struct in_addr cl_in_addr;
194 int (*get_subbuf)(struct thread_information *, unsigned int);
195 int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
196 int (*read_data)(struct thread_information *, void *, unsigned int);
198 unsigned long events_processed;
199 unsigned long long data_read;
200 struct device_information *device;
207 struct tip_subbuf_fifo fifo;
208 struct tip_subbuf *leftover_ts;
211 * mmap controlled output files
213 unsigned long long fs_size;
214 unsigned long long fs_max_size;
215 unsigned long fs_off;
217 unsigned long fs_buf_len;
220 struct device_information {
224 volatile int trace_started;
225 unsigned long drop_count;
226 struct thread_information *threads;
230 static struct thread_information *thread_information;
232 static struct device_information *device_information;
234 /* command line option globals */
235 static char *relay_path;
236 static char *output_name;
237 static char *output_dir;
238 static int act_mask = ~0U;
239 static int kill_running_trace;
240 static unsigned long buf_size = BUF_SIZE;
241 static unsigned long buf_nr = BUF_NR;
242 static unsigned int page_size;
244 #define is_done() (*(volatile int *)(&done))
245 static volatile int done;
247 #define is_trace_stopped() (*(volatile int *)(&trace_stopped))
248 static volatile int trace_stopped;
250 #define is_stat_shown() (*(volatile int *)(&stat_shown))
251 static volatile int stat_shown;
253 int data_is_native = -1;
255 static void exit_trace(int status);
257 #define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
258 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
260 #define __for_each_dip(__d, __i, __e) \
261 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
263 #define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
264 #define for_each_tip(__d, __t, __j) \
265 for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
268 * networking stuff follows. we include a magic number so we know whether
269 * to endianness convert or not
271 struct blktrace_net_hdr {
272 u32 magic; /* same as trace magic */
273 char buts_name[32]; /* trace name */
274 u32 cpu; /* for which cpu */
276 u32 len; /* length of following trace data */
279 #define TRACE_NET_PORT (8462)
288 * network cmd line params
290 static char hostname[MAXHOSTNAMELEN];
291 static int net_port = TRACE_NET_PORT;
292 static int net_mode = 0;
293 static int net_use_sendfile;
295 static int net_in_fd = -1;
296 static int net_out_fd = -1;
298 static void handle_sigint(__attribute__((__unused__)) int sig)
300 struct device_information *dip;
304 * stop trace so we can reap currently produced data
306 for_each_dip(dip, i) {
309 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
310 perror("BLKTRACESTOP");
316 static int get_dropped_count(const char *buts_name)
319 char tmp[MAXPATHLEN + 64];
321 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
322 relay_path, buts_name);
324 fd = open(tmp, O_RDONLY);
327 * this may be ok, if the kernel doesn't support dropped counts
332 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
336 if (read(fd, tmp, sizeof(tmp)) < 0) {
347 static size_t get_subbuf_padding(struct thread_information *tip,
350 size_t padding_size = buf_nr * sizeof(size_t);
353 if (read(tip->pfd, tip->pfd_buf, padding_size) < 0) {
354 perror("tip pad read");
357 ret = tip->pfd_buf[subbuf];
362 static int start_trace(struct device_information *dip)
364 struct blk_user_trace_setup buts;
366 memset(&buts, 0, sizeof(buts));
367 buts.buf_size = buf_size;
368 buts.buf_nr = buf_nr;
369 buts.act_mask = act_mask;
371 if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
372 perror("BLKTRACESETUP");
376 if (ioctl(dip->fd, BLKTRACESTART) < 0) {
377 perror("BLKTRACESTART");
381 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
382 dip_set_tracing(dip, 1);
386 static void stop_trace(struct device_information *dip)
388 if (dip_tracing(dip) || kill_running_trace) {
389 dip_set_tracing(dip, 0);
392 * should be stopped, just don't complain if it isn't
394 ioctl(dip->fd, BLKTRACESTOP);
396 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
397 perror("BLKTRACETEARDOWN");
404 static void stop_all_traces(void)
406 struct device_information *dip;
409 for_each_dip(dip, i) {
410 dip->drop_count = get_dropped_count(dip->buts_name);
415 static void wait_for_data(struct thread_information *tip)
417 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
421 if (pfd.revents & POLLIN)
423 if (tip->ofile_stdout)
425 } while (!is_done());
428 static int read_data_file(struct thread_information *tip, void *buf,
436 ret = read(tip->fd, buf, len);
442 if (errno != EAGAIN) {
444 fprintf(stderr,"Thread %d failed read of %s\n",
450 } while (!is_done());
456 static int read_data_net(struct thread_information *tip, void *buf,
459 unsigned int bytes_left = len;
463 ret = recv(net_in_fd, buf, bytes_left, MSG_WAITALL);
468 if (errno != EAGAIN) {
470 fprintf(stderr, "server: failed read\n");
478 } while (!is_done() && bytes_left);
480 return len - bytes_left;
483 static int read_data(struct thread_information *tip, void *buf,
486 return tip->read_data(tip, buf, len);
489 static inline struct tip_subbuf *
490 subbuf_fifo_dequeue(struct thread_information *tip)
492 const int head = tip->fifo.head;
493 const int next = (head + 1) & (FIFO_SIZE - 1);
495 if (head != tip->fifo.tail) {
496 struct tip_subbuf *ts = tip->fifo.q[head];
499 tip->fifo.head = next;
506 static inline int subbuf_fifo_queue(struct thread_information *tip,
507 struct tip_subbuf *ts)
509 const int tail = tip->fifo.tail;
510 const int next = (tail + 1) & (FIFO_SIZE - 1);
512 if (next != tip->fifo.head) {
513 tip->fifo.q[tail] = ts;
515 tip->fifo.tail = next;
519 fprintf(stderr, "fifo too small!\n");
524 * For file output, truncate and mmap the file appropriately
526 static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
528 int ofd = fileno(tip->ofile);
532 * extend file, if we have to. use chunks of 16 subbuffers.
534 if (tip->fs_off + buf_size > tip->fs_buf_len) {
536 munlock(tip->fs_buf, tip->fs_buf_len);
537 munmap(tip->fs_buf, tip->fs_buf_len);
541 tip->fs_off = tip->fs_size & (page_size - 1);
542 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
543 tip->fs_max_size += tip->fs_buf_len;
545 if (ftruncate(ofd, tip->fs_max_size) < 0) {
550 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
551 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
552 if (tip->fs_buf == MAP_FAILED) {
556 mlock(tip->fs_buf, tip->fs_buf_len);
559 ret = read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
561 tip->data_read += ret;
571 * Use the copy approach for pipes and network
573 static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
575 struct tip_subbuf *ts = malloc(sizeof(*ts));
578 ts->buf = malloc(buf_size);
579 ts->max_len = maxlen;
581 ret = read_data(tip, ts->buf, ts->max_len);
584 tip->data_read += ret;
585 if (subbuf_fifo_queue(tip, ts))
592 static int get_subbuf_sendfile(struct thread_information *tip,
595 struct tip_subbuf *ts;
597 unsigned int ready, this_size, total;
602 * hack to get last data out, we can't use sendfile for that
605 return get_subbuf(tip, maxlen);
607 if (fstat(tip->fd, &sb) < 0) {
608 perror("trace stat");
612 ready = sb.st_size - tip->ofile_offset;
615 * delay a little, since poll() will return data available
616 * until sendfile() is run
622 this_size = buf_size;
625 if (this_size > ready)
628 ts = malloc(sizeof(*ts));
634 ts->offset = tip->ofile_offset;
635 tip->ofile_offset += ts->len;
637 if (subbuf_fifo_queue(tip, ts))
646 static void close_thread(struct thread_information *tip)
654 if (tip->ofile_buffer)
655 free(tip->ofile_buffer);
664 tip->ofile_buffer = NULL;
668 static void tip_ftrunc_final(struct thread_information *tip)
671 * truncate to right size and cleanup mmap
673 if (tip->ofile_mmap) {
674 int ofd = fileno(tip->ofile);
677 munmap(tip->fs_buf, tip->fs_buf_len);
679 ftruncate(ofd, tip->fs_size);
683 static void *thread_main(void *arg)
685 struct thread_information *tip = arg;
686 pid_t pid = getpid();
690 CPU_SET((tip->cpu), &cpu_mask);
692 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
693 perror("sched_setaffinity");
697 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
698 relay_path, tip->device->buts_name, tip->cpu);
699 tip->fd = open(tip->fn, O_RDONLY);
702 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
707 if (net_mode == Net_client && net_use_sendfile) {
708 char tmp[MAXPATHLEN + 64];
710 snprintf(tmp, sizeof(tmp), "%s/block/%s/trace%d.padding",
711 relay_path, tip->device->buts_name, tip->cpu);
713 tip->pfd = open(tmp, O_RDONLY);
715 fprintf(stderr, "Couldn't open padding file %s\n", tmp);
719 tip->pfd_buf = malloc(buf_nr * sizeof(size_t));
723 if (tip->get_subbuf(tip, buf_size) < 0)
728 * trace is stopped, pull data until we get a short read
730 while (tip->get_subbuf(tip, buf_size) > 0)
733 tip_ftrunc_final(tip);
738 static int write_data_net(int fd, void *buf, unsigned int buf_len)
740 unsigned int bytes_left = buf_len;
744 ret = send(fd, buf, bytes_left, 0);
757 static int net_send_header(struct thread_information *tip, unsigned int len)
759 struct blktrace_net_hdr hdr;
761 hdr.magic = BLK_IO_TRACE_MAGIC;
762 strcpy(hdr.buts_name, tip->device->buts_name);
764 hdr.max_cpus = ncpus;
767 return write_data_net(net_out_fd, &hdr, sizeof(hdr));
771 * send header with 0 length to signal end-of-run
773 static void net_client_send_close(void)
775 struct blktrace_net_hdr hdr;
777 hdr.magic = BLK_IO_TRACE_MAGIC;
779 hdr.max_cpus = ncpus;
782 write_data_net(net_out_fd, &hdr, sizeof(hdr));
785 static int flush_subbuf_net(struct thread_information *tip,
786 struct tip_subbuf *ts)
788 if (net_send_header(tip, ts->len))
790 if (write_data_net(net_out_fd, ts->buf, ts->len))
798 static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
800 unsigned int bytes_left = ts->len;
803 while (bytes_left && !is_done()) {
804 ret = sendfile(net_out_fd, tip->fd, &ts->offset, bytes_left);
820 static int flush_subbuf_sendfile(struct thread_information *tip,
821 struct tip_subbuf *ts)
827 * currently we cannot use sendfile() on the last bytes read, as they
828 * may not be a full subbuffer. get_subbuf_sendfile() falls back to
829 * the read approach for those, so use send() to ship them out
832 return flush_subbuf_net(tip, ts);
834 subbuf = (ts->offset / buf_size) % buf_nr;
835 padding = get_subbuf_padding(tip, subbuf);
838 if (net_send_header(tip, ts->len))
840 if (net_sendfile(tip, ts))
843 tip->data_read += ts->len;
848 static int write_data(struct thread_information *tip, void *buf,
849 unsigned int buf_len)
857 ret = fwrite(buf, buf_len, 1, tip->ofile);
867 if (tip->ofile_stdout)
873 static int flush_subbuf_file(struct thread_information *tip,
874 struct tip_subbuf *ts)
876 unsigned int offset = 0;
877 struct blk_io_trace *t;
878 int pdu_len, events = 0;
881 * surplus from last run
883 if (tip->leftover_ts) {
884 struct tip_subbuf *prev_ts = tip->leftover_ts;
886 if (prev_ts->len + ts->len > prev_ts->max_len) {
887 prev_ts->max_len += ts->len;
888 prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
891 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
892 prev_ts->len += ts->len;
898 tip->leftover_ts = NULL;
901 while (offset + sizeof(*t) <= ts->len) {
902 t = ts->buf + offset;
904 if (verify_trace(t)) {
905 write_data(tip, ts->buf, offset);
909 pdu_len = t->pdu_len;
911 if (offset + sizeof(*t) + pdu_len > ts->len)
914 offset += sizeof(*t) + pdu_len;
915 tip->events_processed++;
916 tip->data_read += sizeof(*t) + pdu_len;
920 if (write_data(tip, ts->buf, offset))
924 * leftover bytes, save them for next time
926 if (offset != ts->len) {
927 tip->leftover_ts = ts;
929 memmove(ts->buf, ts->buf + offset, ts->len);
938 static int write_tip_events(struct thread_information *tip)
940 struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
943 return tip->flush_subbuf(tip, ts);
949 * scans the tips we know and writes out the subbuffers we accumulate
951 static void get_and_write_events(void)
953 struct device_information *dip;
954 struct thread_information *tip;
955 int i, j, events, ret, tips_running;
960 for_each_dip(dip, i) {
961 for_each_tip(dip, tip, j) {
962 ret = write_tip_events(tip);
978 for_each_dip(dip, i) {
979 for_each_tip(dip, tip, j) {
980 ret = write_tip_events(tip);
983 tips_running += !tip->exited;
987 } while (events || tips_running);
990 static void wait_for_threads(void)
993 * for piped or network output, poll and fetch data for writeout.
994 * for files, we just wait around for trace threads to exit
996 if ((output_name && !strcmp(output_name, "-")) ||
997 net_mode == Net_client)
998 get_and_write_events();
1000 struct device_information *dip;
1001 struct thread_information *tip;
1002 int i, j, tips_running;
1008 for_each_dip(dip, i)
1009 for_each_tip(dip, tip, j)
1010 tips_running += !tip->exited;
1011 } while (tips_running);
1014 if (net_mode == Net_client)
1015 net_client_send_close();
1018 static int fill_ofname(struct thread_information *tip, char *dst,
1026 len = sprintf(dst, "%s/", output_dir);
1028 if (net_mode == Net_server) {
1029 len += sprintf(dst + len, "%s-", inet_ntoa(tip->cl_in_addr));
1031 len += strftime(dst + len, 64, "%F-%T/", gmtime(&t));
1034 if (stat(dst, &sb) < 0) {
1035 if (errno != ENOENT) {
1039 if (mkdir(dst, 0755) < 0) {
1041 fprintf(stderr, "Can't make output dir\n");
1047 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
1049 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1054 static void fill_ops(struct thread_information *tip)
1059 if (net_mode == Net_client) {
1060 if (net_use_sendfile) {
1061 tip->get_subbuf = get_subbuf_sendfile;
1062 tip->flush_subbuf = flush_subbuf_sendfile;
1064 tip->get_subbuf = get_subbuf;
1065 tip->flush_subbuf = flush_subbuf_net;
1068 if (tip->ofile_mmap)
1069 tip->get_subbuf = mmap_subbuf;
1071 tip->get_subbuf = get_subbuf;
1073 tip->flush_subbuf = flush_subbuf_file;
1076 if (net_mode == Net_server)
1077 tip->read_data = read_data_net;
1079 tip->read_data = read_data_file;
1082 static int tip_open_output(struct device_information *dip,
1083 struct thread_information *tip)
1085 int pipeline = output_name && !strcmp(output_name, "-");
1086 int mode, vbuf_size;
1089 if (net_mode == Net_client) {
1091 tip->ofile_stdout = 0;
1092 tip->ofile_mmap = 0;
1094 } else if (pipeline) {
1095 tip->ofile = fdopen(STDOUT_FILENO, "w");
1096 tip->ofile_stdout = 1;
1097 tip->ofile_mmap = 0;
1101 if (fill_ofname(tip, op, dip->buts_name))
1103 tip->ofile = fopen(op, "w+");
1104 tip->ofile_stdout = 0;
1105 tip->ofile_mmap = 1;
1107 vbuf_size = OFILE_BUF;
1110 if (tip->ofile == NULL) {
1115 tip->ofile_buffer = malloc(vbuf_size);
1116 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1127 static int start_threads(struct device_information *dip)
1129 struct thread_information *tip;
1132 for_each_tip(dip, tip, j) {
1135 tip->events_processed = 0;
1138 memset(&tip->fifo, 0, sizeof(tip->fifo));
1139 tip->leftover_ts = NULL;
1141 if (tip_open_output(dip, tip))
1144 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
1145 perror("pthread_create");
1154 static void stop_threads(struct device_information *dip)
1156 struct thread_information *tip;
1160 for_each_tip(dip, tip, i) {
1161 (void) pthread_join(tip->thread, (void *) &ret);
1166 static void stop_all_threads(void)
1168 struct device_information *dip;
1171 for_each_dip(dip, i)
1175 static void stop_all_tracing(void)
1177 struct device_information *dip;
1180 for_each_dip(dip, i)
1184 static void exit_trace(int status)
1186 if (!is_trace_stopped()) {
1195 static int resize_devices(char *path)
1197 int size = (ndevs + 1) * sizeof(struct device_information);
1199 device_information = realloc(device_information, size);
1200 if (!device_information) {
1201 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1204 device_information[ndevs].path = path;
1209 static int open_devices(void)
1211 struct device_information *dip;
1214 for_each_dip(dip, i) {
1215 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
1225 static int start_devices(void)
1227 struct device_information *dip;
1230 size = ncpus * sizeof(struct thread_information);
1231 thread_information = malloc(size * ndevs);
1232 if (!thread_information) {
1233 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1237 for_each_dip(dip, i) {
1238 if (start_trace(dip)) {
1240 fprintf(stderr, "Failed to start trace on %s\n",
1247 __for_each_dip(dip, j, i)
1253 for_each_dip(dip, i) {
1254 dip->threads = thread_information + (i * ncpus);
1255 if (start_threads(dip)) {
1256 fprintf(stderr, "Failed to start worker threads\n");
1262 __for_each_dip(dip, j, i)
1264 for_each_dip(dip, i)
1273 static void show_stats(void)
1275 struct device_information *dip;
1276 struct thread_information *tip;
1277 unsigned long long events_processed, data_read;
1278 unsigned long total_drops;
1279 int i, j, no_stdout = 0;
1281 if (is_stat_shown())
1284 if (output_name && !strcmp(output_name, "-"))
1290 for_each_dip(dip, i) {
1292 printf("Device: %s\n", dip->path);
1293 events_processed = 0;
1295 for_each_tip(dip, tip, j) {
1297 printf(" CPU%3d: %20lu events, %8llu KiB data\n",
1298 tip->cpu, tip->events_processed,
1299 (tip->data_read + 1023) >> 10);
1300 events_processed += tip->events_processed;
1301 data_read += tip->data_read;
1303 total_drops += dip->drop_count;
1305 printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
1306 events_processed, dip->drop_count,
1307 (data_read + 1023) >> 10);
1311 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
1314 static struct device_information *net_get_dip(char *buts_name,
1315 struct in_addr *cl_in_addr)
1317 struct device_information *dip;
1320 for (i = 0; i < ndevs; i++) {
1321 dip = &device_information[i];
1323 if (!strcmp(dip->buts_name, buts_name))
1327 device_information = realloc(device_information, (ndevs + 1) * sizeof(*dip));
1328 dip = &device_information[ndevs];
1329 memset(dip, 0, sizeof(*dip));
1331 strcpy(dip->buts_name, buts_name);
1332 dip->path = strdup(buts_name);
1334 dip->threads = malloc(ncpus * sizeof(struct thread_information));
1335 memset(dip->threads, 0, ncpus * sizeof(struct thread_information));
1340 for (i = 0; i < ncpus; i++) {
1341 struct thread_information *tip = &dip->threads[i];
1347 tip->cl_in_addr = *cl_in_addr;
1349 if (tip_open_output(dip, tip))
1356 static struct thread_information *net_get_tip(struct blktrace_net_hdr *bnh,
1357 struct in_addr *cl_in_addr)
1359 struct device_information *dip;
1361 ncpus = bnh->max_cpus;
1362 dip = net_get_dip(bnh->buts_name, cl_in_addr);
1363 return &dip->threads[bnh->cpu];
1366 static int net_get_header(struct blktrace_net_hdr *bnh)
1368 int fl = fcntl(net_in_fd, F_GETFL);
1369 int bytes_left, ret;
1372 fcntl(net_in_fd, F_SETFL, fl | O_NONBLOCK);
1373 bytes_left = sizeof(*bnh);
1374 while (bytes_left && !is_done()) {
1375 ret = recv(net_in_fd, p, bytes_left, MSG_WAITALL);
1377 if (errno != EAGAIN) {
1378 perror("recv header");
1391 fcntl(net_in_fd, F_SETFL, fl & ~O_NONBLOCK);
1395 static int net_server_loop(struct in_addr *cl_in_addr)
1397 struct thread_information *tip;
1398 struct blktrace_net_hdr bnh;
1400 if (net_get_header(&bnh))
1403 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1404 fprintf(stderr, "server: received data is bad\n");
1408 if (!data_is_native) {
1409 bnh.magic = be32_to_cpu(bnh.magic);
1410 bnh.cpu = be32_to_cpu(bnh.cpu);
1411 bnh.len = be32_to_cpu(bnh.len);
1414 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1415 fprintf(stderr, "server: bad data magic\n");
1420 * len == 0 means that the other end signalled end-of-run
1423 fprintf(stderr, "server: end of run\n");
1427 tip = net_get_tip(&bnh, cl_in_addr);
1431 if (mmap_subbuf(tip, bnh.len))
1438 * Start here when we are in server mode - just fetch data from the network
1441 static int net_server(void)
1443 struct device_information *dip;
1444 struct thread_information *tip;
1445 struct sockaddr_in addr;
1449 fd = socket(AF_INET, SOCK_STREAM, 0);
1451 perror("server: socket");
1456 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1457 perror("setsockopt");
1461 memset(&addr, 0, sizeof(addr));
1462 addr.sin_family = AF_INET;
1463 addr.sin_addr.s_addr = htonl(INADDR_ANY);
1464 addr.sin_port = htons(net_port);
1466 if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1471 if (listen(fd, 1) < 0) {
1477 signal(SIGINT, NULL);
1478 signal(SIGHUP, NULL);
1479 signal(SIGTERM, NULL);
1480 signal(SIGALRM, NULL);
1482 printf("blktrace: waiting for incoming connection...\n");
1484 socklen = sizeof(addr);
1485 net_in_fd = accept(fd, (struct sockaddr *) &addr, &socklen);
1486 if (net_in_fd < 0) {
1491 signal(SIGINT, handle_sigint);
1492 signal(SIGHUP, handle_sigint);
1493 signal(SIGTERM, handle_sigint);
1494 signal(SIGALRM, handle_sigint);
1496 printf("blktrace: connection from %s\n", inet_ntoa(addr.sin_addr));
1498 while (!is_done()) {
1499 if (net_server_loop(&addr.sin_addr))
1503 for_each_dip(dip, i)
1504 for_each_tip(dip, tip, j)
1505 tip_ftrunc_final(tip);
1513 * cleanup for next run
1515 for_each_dip(dip, i) {
1516 for_each_tip(dip, tip, j)
1523 free(device_information);
1524 device_information = NULL;
1534 * Setup outgoing network connection where we will transmit data
1536 static int net_setup_client(void)
1538 struct sockaddr_in addr;
1541 fd = socket(AF_INET, SOCK_STREAM, 0);
1543 perror("client: socket");
1547 memset(&addr, 0, sizeof(addr));
1548 addr.sin_family = AF_INET;
1549 addr.sin_port = htons(net_port);
1551 if (inet_aton(hostname, &addr.sin_addr) != 1) {
1552 struct hostent *hent = gethostbyname(hostname);
1554 perror("gethostbyname");
1558 memcpy(&addr.sin_addr, hent->h_addr, 4);
1559 strcpy(hostname, hent->h_name);
1562 printf("blktrace: connecting to %s\n", hostname);
1564 if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1565 perror("client: connect");
1569 printf("blktrace: connected!\n");
1574 static char usage_str[] = \
1575 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1576 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1577 "\t-d Use specified device. May also be given last after options\n" \
1578 "\t-r Path to mounted relayfs, defaults to /relay\n" \
1579 "\t-o File(s) to send output to\n" \
1580 "\t-D Directory to prepend to output file names\n" \
1581 "\t-k Kill a running trace\n" \
1582 "\t-w Stop after defined time, in seconds\n" \
1583 "\t-a Only trace specified actions. See documentation\n" \
1584 "\t-A Give trace mask as a single value. See documentation\n" \
1585 "\t-b Sub buffer size in KiB\n" \
1586 "\t-n Number of sub buffers\n" \
1587 "\t-l Run in network listen mode (blktrace server)\n" \
1588 "\t-h Run in network client mode, connecting to the given host\n" \
1589 "\t-p Network port to use (default 8462)\n" \
1590 "\t-s Make the network client use sendfile() to transfer data\n" \
1591 "\t-V Print program version info\n\n";
1593 static void show_usage(char *program)
1595 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
1598 int main(int argc, char *argv[])
1600 static char default_relay_path[] = "/relay";
1604 int act_mask_tmp = 0;
1606 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1609 i = find_mask_map(optarg);
1611 fprintf(stderr,"Invalid action mask %s\n",
1619 if ((sscanf(optarg, "%x", &i) != 1) ||
1620 !valid_act_opt(i)) {
1622 "Invalid set action mask %s/0x%x\n",
1630 if (resize_devices(optarg) != 0)
1635 relay_path = optarg;
1639 output_name = optarg;
1642 kill_running_trace = 1;
1645 stop_watch = atoi(optarg);
1646 if (stop_watch <= 0) {
1648 "Invalid stopwatch value (%d secs)\n",
1654 printf("%s version %s\n", argv[0], blktrace_version);
1657 buf_size = strtoul(optarg, NULL, 10);
1658 if (buf_size <= 0 || buf_size > 16*1024) {
1660 "Invalid buffer size (%lu)\n",buf_size);
1666 buf_nr = strtoul(optarg, NULL, 10);
1669 "Invalid buffer nr (%lu)\n", buf_nr);
1674 output_dir = optarg;
1677 net_mode = Net_client;
1678 strcpy(hostname, optarg);
1681 net_mode = Net_server;
1684 net_port = atoi(optarg);
1687 net_use_sendfile = 1;
1690 show_usage(argv[0]);
1695 setlocale(LC_NUMERIC, "en_US");
1697 page_size = getpagesize();
1699 if (net_mode == Net_server)
1700 return net_server();
1702 while (optind < argc) {
1703 if (resize_devices(argv[optind++]) != 0)
1708 show_usage(argv[0]);
1713 relay_path = default_relay_path;
1715 if (act_mask_tmp != 0)
1716 act_mask = act_mask_tmp;
1718 if (statfs(relay_path, &st) < 0) {
1720 fprintf(stderr,"%s does not appear to be a valid path\n",
1723 } else if (st.f_type != (long) RELAYFS_TYPE) {
1724 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
1729 if (open_devices() != 0)
1732 if (kill_running_trace) {
1737 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1739 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
1743 signal(SIGINT, handle_sigint);
1744 signal(SIGHUP, handle_sigint);
1745 signal(SIGTERM, handle_sigint);
1746 signal(SIGALRM, handle_sigint);
1748 if (net_mode == Net_client && net_setup_client())
1751 if (start_devices() != 0)
1754 atexit(stop_all_tracing);
1761 if (!is_trace_stopped()) {