2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
7 * Rewrite to have a single thread per CPU (managing all devices on that CPU)
8 * Alan D. Brunelle <alan.brunelle@hp.com> - January 2009
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 #include <sys/ioctl.h>
40 #include <sys/types.h>
44 #include <sys/param.h>
46 #include <sys/resource.h>
47 #include <sys/socket.h>
48 #include <netinet/in.h>
49 #include <arpa/inet.h>
51 #include <sys/sendfile.h>
57 * You may want to increase this even more, if you are logging at a high
58 * rate and see skipped/missed events
60 #define BUF_SIZE (512 * 1024)
63 #define FILE_VBUF_SIZE (128 * 1024)
65 #define DEBUGFS_TYPE (0x64626720)
66 #define TRACE_NET_PORT (8462)
75 * Generic stats collected: nevents can be _roughly_ estimated by data_read
76 * (discounting pdu...)
78 * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
81 unsigned long long data_read;
82 unsigned long long nevents;
86 struct list_head head;
87 char *path; /* path to device special file */
88 char *buts_name; /* name returned from bt kernel code */
89 struct pdc_stats *stats;
91 unsigned long long drops;
94 * For piped output only:
96 * Each tracer will have a tracer_devpath_head that it will add new
97 * data onto. It's list is protected above (tracer_devpath_head.mutex)
98 * and it will signal the processing thread using the dp_cond,
99 * dp_mutex & dp_entries variables above.
101 struct tracer_devpath_head *heads;
104 * For network server mode only:
108 time_t cl_connect_time;
113 * For piped output to stdout we will have each tracer thread (one per dev)
114 * tack buffers read from the relay queues on a per-device list.
116 * The main thread will then collect trace buffers from each of lists in turn.
118 * We will use a mutex to guard each of the trace_buf list. The tracers
119 * can then signal the main thread using <dp_cond,dp_mutex> and
120 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
121 * signal. When dp_entries is 0, the main thread will wait for that condition
124 * adb: It may be better just to have a large buffer per tracer per dev,
125 * and then use it as a ring-buffer. This would certainly cut down a lot
126 * of malloc/free thrashing, at the cost of more memory movements (potentially).
129 struct list_head head;
135 struct tracer_devpath_head {
136 pthread_mutex_t mutex;
137 struct list_head head;
138 struct trace_buf *prev;
142 * Used to handle the mmap() interfaces for output file (containing traces)
146 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
147 unsigned long buf_size, buf_nr;
152 * Each thread doing work on a (client) side of blktrace will have one
153 * of these. The ios array contains input/output information, pfds holds
154 * poll() data. The volatile's provide flags to/from the main executing
158 struct list_head head;
162 pthread_mutex_t mutex;
165 volatile int running, status, is_done;
169 * networking stuff follows. we include a magic number so we know whether
170 * to endianness convert or not.
172 * The len field is overloaded:
173 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu
174 * 1 - Indicates a "close" - Shut down connection orderly
176 * The cpu field is overloaded on close: it will contain the number of drops.
178 struct blktrace_net_hdr {
179 u32 magic; /* same as trace magic */
180 char buts_name[32]; /* trace name */
181 u32 cpu; /* for which cpu */
183 u32 len; /* length of following trace data */
184 u32 cl_id; /* id for set of client per-cpu connections */
185 u32 buf_size; /* client buf_size for this trace */
186 u32 buf_nr; /* client buf_nr for this trace */
187 u32 page_size; /* client page_size for this trace */
191 * Each host encountered has one of these. The head is used to link this
192 * on to the network server's ch_list. Connections associated with this
193 * host are linked on conn_list, and any devices traced on that host
194 * are connected on the devpaths list.
197 struct list_head head;
198 struct list_head conn_list;
199 struct list_head devpaths;
200 struct net_server_s *ns;
202 struct in_addr cl_in_addr;
203 int connects, ndevs, cl_opens;
207 * Each connection (client to server socket ('fd')) has one of these. A
208 * back reference to the host ('ch'), and lists headers (for the host
209 * list, and the network server conn_list) are also included.
212 struct list_head ch_head, ns_head;
219 * The network server requires some poll structures to be maintained -
220 * one per conection currently on conn_list. The nchs/ch_list values
221 * are for each host connected to this server. The addr field is used
222 * for scratch as new connections are established.
224 struct net_server_s {
225 struct list_head conn_list;
226 struct list_head ch_list;
228 int listen_fd, connects, nchs;
229 struct sockaddr_in addr;
233 * This structure is (generically) used to providide information
234 * for a read-to-write set of values.
236 * ifn & ifd represent input information
238 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
244 struct cl_conn *nc; /* Server network connection */
247 * mmap controlled output files
249 struct mmap_info mmap_info;
252 * Client network fields
255 unsigned long long data_queued;
258 * Input/output file descriptors & names
261 char ifn[MAXPATHLEN + 64];
262 char ofn[MAXPATHLEN + 64];
265 static char blktrace_version[] = "2.0.0";
268 * Linkage to blktrace helper routines (trace conversions)
270 int data_is_native = -1;
274 static int act_mask = ~0U;
275 static char *debugfs_path = "/sys/kernel/debug";
276 static char *output_name;
277 static char *output_dir;
278 static int kill_running_trace;
279 static int stop_watch;
280 static unsigned long buf_size = BUF_SIZE;
281 static unsigned long buf_nr = BUF_NR;
282 static LIST_HEAD(devpaths);
283 static LIST_HEAD(tracers);
285 static volatile int done;
287 static int piped_output;
290 static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
291 static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
292 static volatile int dp_entries;
295 * network cmd line params
297 static char hostname[MAXHOSTNAMELEN];
298 static int net_port = TRACE_NET_PORT;
299 static int net_use_sendfile = 1;
303 static int (*handle_pfds)(struct tracer *, int, int);
304 static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
306 #define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
307 static struct option l_opts[] = {
310 .has_arg = required_argument,
315 .name = "input-devs",
316 .has_arg = required_argument,
322 .has_arg = required_argument,
328 .has_arg = required_argument,
334 .has_arg = required_argument,
340 .has_arg = required_argument,
346 .has_arg = no_argument,
352 .has_arg = required_argument,
358 .has_arg = no_argument,
364 .has_arg = no_argument,
369 .name = "buffer-size",
370 .has_arg = required_argument,
375 .name = "num-sub-buffers",
376 .has_arg = required_argument,
381 .name = "output-dir",
382 .has_arg = required_argument,
388 .has_arg = no_argument,
394 .has_arg = required_argument,
400 .has_arg = required_argument,
405 .name = "no-sendfile",
406 .has_arg = no_argument,
415 static char usage_str[] = \
416 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
417 "[ -a action ] [ -A action mask ] [ -I <devs file> ] [ -v ]\n\n" \
418 "\t-d Use specified device. May also be given last after options\n" \
419 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
420 "\t-o File(s) to send output to\n" \
421 "\t-D Directory to prepend to output file names\n" \
422 "\t-k Kill a running trace\n" \
423 "\t-w Stop after defined time, in seconds\n" \
424 "\t-a Only trace specified actions. See documentation\n" \
425 "\t-A Give trace mask as a single value. See documentation\n" \
426 "\t-b Sub buffer size in KiB\n" \
427 "\t-n Number of sub buffers\n" \
428 "\t-l Run in network listen mode (blktrace server)\n" \
429 "\t-h Run in network client mode, connecting to the given host\n" \
430 "\t-p Network port to use (default 8462)\n" \
431 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
432 "\t-I Add devices found in <devs file>\n" \
433 "\t-V Print program version info\n\n";
435 static void clear_events(struct pollfd *pfd)
441 static inline int net_client_use_sendfile(void)
443 return net_mode == Net_client && net_use_sendfile;
446 static inline int net_client_use_send(void)
448 return net_mode == Net_client && !net_use_sendfile;
451 static inline int use_tracer_devpaths(void)
453 return piped_output || net_client_use_send();
456 static inline int in_addr_eq(struct in_addr a, struct in_addr b)
458 return a.s_addr == b.s_addr;
461 static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
463 dpp->stats[cpu].data_read += data_read;
466 static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
468 dpp->stats[cpu].nevents += nevents;
471 static void show_usage(char *prog)
473 fprintf(stderr, "Usage: %s %s %s", prog, blktrace_version, usage_str);
476 static void init_mmap_info(struct mmap_info *mip)
478 mip->buf_size = buf_size;
479 mip->buf_nr = buf_nr;
480 mip->pagesize = pagesize;
483 static void net_close_connection(int *fd)
485 shutdown(*fd, SHUT_RDWR);
490 static void dpp_free(struct devpath *dpp)
499 free(dpp->buts_name);
503 static int lock_on_cpu(int cpu)
508 CPU_SET(cpu, &cpu_mask);
509 if (sched_setaffinity(getpid(), sizeof(cpu_mask), &cpu_mask) < 0)
516 * Create a timespec 'msec' milliseconds into the future
518 static inline void make_timespec(struct timespec *tsp, long delta_msec)
522 gettimeofday(&now, NULL);
523 tsp->tv_sec = now.tv_sec;
524 tsp->tv_nsec = 1000L * now.tv_usec;
526 tsp->tv_nsec += (delta_msec * 1000000L);
527 if (tsp->tv_nsec > 1000000000L) {
528 long secs = tsp->tv_nsec / 1000000000L;
531 tsp->tv_nsec -= (secs * 1000000000L);
535 static int increase_limit(int resource, rlim_t increase)
538 int save_errno = errno;
540 if (!getrlimit(resource, &rlim)) {
541 rlim.rlim_cur += increase;
542 if (rlim.rlim_cur >= rlim.rlim_max)
543 rlim.rlim_max = rlim.rlim_cur + increase;
545 if (!setrlimit(resource, &rlim))
553 static int handle_open_failure(void)
555 if (errno == ENFILE || errno == EMFILE)
556 return increase_limit(RLIMIT_NOFILE, 16);
560 static int handle_mem_failure(size_t length)
563 return handle_open_failure();
564 else if (errno == ENOMEM)
565 return increase_limit(RLIMIT_MEMLOCK, 2 * length);
569 static FILE *my_fopen(const char *path, const char *mode)
574 fp = fopen(path, mode);
575 } while (fp == NULL && handle_open_failure());
580 static int my_open(const char *path, int flags)
585 fd = open(path, flags);
586 } while (fd < 0 && handle_open_failure());
591 static int my_socket(int domain, int type, int protocol)
596 fd = socket(domain, type, protocol);
597 } while (fd < 0 && handle_open_failure());
602 static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
607 fd = accept(sockfd, addr, addrlen);
608 } while (fd < 0 && handle_open_failure());
613 static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
619 new = mmap(addr, length, prot, flags, fd, offset);
620 } while (new == MAP_FAILED && handle_mem_failure(length));
625 static int my_mlock(const void *addr, size_t len)
630 ret = mlock(addr, len);
631 } while (ret < 0 && handle_mem_failure(len));
636 static int __stop_trace(int fd)
639 * Should be stopped, don't complain if it isn't
641 ioctl(fd, BLKTRACESTOP);
642 return ioctl(fd, BLKTRACETEARDOWN);
645 static int write_data(char *buf, int len)
650 ret = fwrite(buf, len, 1, pfp);
651 if (ferror(pfp) || ret != 1) {
652 if (errno == EINTR) {
657 if (!piped_output || (errno != EPIPE && errno != EBADF)) {
658 fprintf(stderr, "write(%d) failed: %d/%s\n",
659 len, errno, strerror(errno));
673 * Returns the number of bytes read (successfully)
675 static int __net_recv_data(int fd, void *buf, unsigned int len)
677 unsigned int bytes_left = len;
679 while (bytes_left && !done) {
680 int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
685 if (errno != EAGAIN) {
686 perror("server: net_recv_data: recv failed");
696 return len - bytes_left;
699 static int net_recv_data(int fd, void *buf, unsigned int len)
701 return __net_recv_data(fd, buf, len);
705 * Returns number of bytes written
707 static int net_send_data(int fd, void *buf, unsigned int buf_len)
710 unsigned int bytes_left = buf_len;
713 ret = send(fd, buf, bytes_left, 0);
723 return buf_len - bytes_left;
726 static int net_send_header(int fd, int cpu, char *buts_name, int len)
728 struct blktrace_net_hdr hdr;
730 memset(&hdr, 0, sizeof(hdr));
732 hdr.magic = BLK_IO_TRACE_MAGIC;
733 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
734 hdr.buts_name[sizeof(hdr.buts_name)-1] = '\0';
736 hdr.max_cpus = ncpus;
738 hdr.cl_id = getpid();
739 hdr.buf_size = buf_size;
741 hdr.page_size = pagesize;
743 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
746 static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
748 struct blktrace_net_hdr ret_hdr;
750 net_send_header(fd, cpu, buts_name, len);
751 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
754 static void net_send_open(int fd, int cpu, char *buts_name)
756 net_send_open_close(fd, cpu, buts_name, 0);
759 static void net_send_close(int fd, char *buts_name, int drops)
762 * Overload CPU w/ number of drops
764 * XXX: Need to clear/set done around call - done=1 (which
765 * is true here) stops reads from happening... :-(
768 net_send_open_close(fd, drops, buts_name, 1);
772 static void ack_open_close(int fd, char *buts_name)
774 net_send_header(fd, 0, buts_name, 2);
777 static void net_send_drops(int fd)
781 __list_for_each(p, &devpaths) {
782 struct devpath *dpp = list_entry(p, struct devpath, head);
784 net_send_close(fd, dpp->buts_name, dpp->drops);
794 static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
797 int fl = fcntl(nc->fd, F_GETFL);
799 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
800 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
801 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
803 if (bytes_read == sizeof(*bnh))
805 else if (bytes_read == 0)
810 static int net_setup_client(void)
813 struct sockaddr_in addr;
815 memset(&addr, 0, sizeof(addr));
816 addr.sin_family = AF_INET;
817 addr.sin_port = htons(net_port);
819 if (inet_aton(hostname, &addr.sin_addr) != 1) {
820 struct hostent *hent = gethostbyname(hostname);
822 perror("gethostbyname");
826 memcpy(&addr.sin_addr, hent->h_addr, 4);
827 strcpy(hostname, hent->h_name);
830 fd = my_socket(AF_INET, SOCK_STREAM, 0);
832 perror("client: socket");
836 if (connect(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
837 if (errno == ECONNREFUSED)
839 "\nclient: Connection to %s refused, "
840 "perhaps the server is not started?\n\n",
843 perror("client: connect");
851 static int open_client_connections(void)
855 cl_fds = calloc(ncpus, sizeof(*cl_fds));
856 for (cpu = 0; cpu < ncpus; cpu++) {
857 cl_fds[cpu] = net_setup_client();
865 close(cl_fds[cpu--]);
870 static void close_client_connections(void)
875 for (cpu = 0, fdp = cl_fds; cpu < ncpus; cpu++, fdp++) {
877 net_send_drops(*fdp);
878 net_close_connection(fdp);
885 static void setup_buts(void)
889 __list_for_each(p, &devpaths) {
890 struct blk_user_trace_setup buts;
891 struct devpath *dpp = list_entry(p, struct devpath, head);
893 memset(&buts, 0, sizeof(buts));
894 buts.buf_size = buf_size;
895 buts.buf_nr = buf_nr;
896 buts.act_mask = act_mask;
898 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) < 0) {
899 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
900 dpp->path, errno, strerror(errno));
902 } else if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
903 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
904 dpp->path, errno, strerror(errno));
909 dpp->buts_name = strdup(buts.name);
912 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
913 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
917 static int get_drops(struct devpath *dpp)
920 char fn[MAXPATHLEN + 64], tmp[256];
922 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
925 fd = my_open(fn, O_RDONLY);
928 * This may be ok: the kernel may not support
932 fprintf(stderr, "Could not open %s: %d/%s\n",
933 fn, errno, strerror(errno));
935 } else if (read(fd, tmp, sizeof(tmp)) < 0) {
936 fprintf(stderr, "Could not read %s: %d/%s\n",
937 fn, errno, strerror(errno));
945 static void get_all_drops(void)
949 __list_for_each(p, &devpaths) {
950 struct devpath *dpp = list_entry(p, struct devpath, head);
951 dpp->drops = get_drops(dpp);
955 static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
957 struct trace_buf *tbp;
959 tbp = malloc(sizeof(*tbp) + bufsize);
960 INIT_LIST_HEAD(&tbp->head);
962 tbp->buf = (void *)(tbp + 1);
964 tbp->dpp = NULL; /* Will be set when tbp is added */
969 static void free_tracer_heads(struct devpath *dpp)
972 struct tracer_devpath_head *hd;
974 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
977 pthread_mutex_destroy(&hd->mutex);
982 static int setup_tracer_devpaths(void)
986 if (net_client_use_send())
987 if (open_client_connections())
990 __list_for_each(p, &devpaths) {
992 struct tracer_devpath_head *hd;
993 struct devpath *dpp = list_entry(p, struct devpath, head);
995 dpp->heads = calloc(ncpus, sizeof(struct tracer_devpath_head));
996 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
997 INIT_LIST_HEAD(&hd->head);
998 pthread_mutex_init(&hd->mutex, NULL);
1006 static inline void add_trace_buf(struct devpath *dpp, int cpu,
1007 struct trace_buf **tbpp)
1009 struct trace_buf *tbp = *tbpp;
1010 struct tracer_devpath_head *hd = &dpp->heads[cpu];
1014 pthread_mutex_lock(&hd->mutex);
1015 list_add_tail(&tbp->head, &hd->head);
1016 pthread_mutex_unlock(&hd->mutex);
1018 *tbpp = alloc_trace_buf(cpu, buf_size);
1021 static inline void incr_entries(int entries_handled)
1023 pthread_mutex_lock(&dp_mutex);
1024 if (dp_entries == 0)
1025 pthread_cond_signal(&dp_cond);
1026 dp_entries += entries_handled;
1027 pthread_mutex_unlock(&dp_mutex);
1030 static int add_devpath(char *path)
1033 struct devpath *dpp;
1036 * Verify device is valid before going too far
1038 fd = my_open(path, O_RDONLY | O_NONBLOCK);
1040 fprintf(stderr, "Invalid path %s specified: %d/%s\n",
1041 path, errno, strerror(errno));
1045 dpp = malloc(sizeof(*dpp));
1046 memset(dpp, 0, sizeof(*dpp));
1047 dpp->path = strdup(path);
1050 list_add_tail(&dpp->head, &devpaths);
1055 static void rel_devpaths(void)
1057 struct list_head *p, *q;
1059 list_for_each_safe(p, q, &devpaths) {
1060 struct devpath *dpp = list_entry(p, struct devpath, head);
1062 list_del(&dpp->head);
1063 __stop_trace(dpp->fd);
1067 free_tracer_heads(dpp);
1074 static int flush_subbuf_net(struct trace_buf *tbp)
1076 int fd = cl_fds[tbp->cpu];
1077 struct devpath *dpp = tbp->dpp;
1079 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
1082 if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
1089 handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
1090 struct list_head *list)
1092 struct trace_buf *tbp;
1093 struct list_head *p, *q;
1094 int entries_handled = 0;
1096 list_for_each_safe(p, q, list) {
1097 tbp = list_entry(p, struct trace_buf, head);
1099 list_del(&tbp->head);
1102 if (cl_fds[tbp->cpu] >= 0) {
1103 if (flush_subbuf_net(tbp)) {
1104 close(cl_fds[tbp->cpu]);
1105 cl_fds[tbp->cpu] = -1;
1112 return entries_handled;
1115 static int handle_list_file(struct tracer_devpath_head *hd,
1116 struct list_head *list)
1118 int off, t_len, nevents;
1119 struct blk_io_trace *t;
1120 struct list_head *p, *q;
1121 int entries_handled = 0;
1122 struct trace_buf *tbp, *prev;
1125 list_for_each_safe(p, q, list) {
1126 tbp = list_entry(p, struct trace_buf, head);
1127 list_del(&tbp->head);
1131 * If there was some leftover before, tack this new
1132 * entry onto the tail of the previous one.
1135 unsigned long tot_len;
1136 struct trace_buf *tmp = tbp;
1141 tot_len = tbp->len + tmp->len;
1142 if (tot_len > buf_size) {
1144 * tbp->head isn't connected (it was 'prev'
1145 * so it had been taken off of the list
1146 * before). Therefore, we can realloc
1147 * the whole structures, as the other fields
1150 tbp = realloc(tbp->buf, sizeof(*tbp) + tot_len);
1151 tbp->buf = (void *)(tbp + 1);
1154 memcpy(tbp->buf + tbp->len, tmp->buf, tmp->len);
1161 * See how many whole traces there are - send them
1162 * all out in one go.
1166 while (off + (int)sizeof(*t) <= tbp->len) {
1167 t = (struct blk_io_trace *)(tbp->buf + off);
1168 t_len = sizeof(*t) + t->pdu_len;
1169 if (off + t_len > tbp->len)
1176 pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
1179 * Write any full set of traces, any remaining data is kept
1180 * for the next pass.
1183 if (write_data(tbp->buf, off) || off == tbp->len)
1187 * Move valid data to beginning of buffer
1190 memmove(tbp->buf, tbp->buf + off, tbp->len);
1198 return entries_handled;
1201 static void __process_trace_bufs(void)
1204 struct list_head *p;
1205 struct list_head list;
1208 __list_for_each(p, &devpaths) {
1209 struct devpath *dpp = list_entry(p, struct devpath, head);
1210 struct tracer_devpath_head *hd = dpp->heads;
1212 for (cpu = 0; cpu < ncpus; cpu++, hd++) {
1213 pthread_mutex_lock(&hd->mutex);
1214 if (list_empty(&hd->head)) {
1215 pthread_mutex_unlock(&hd->mutex);
1219 list_replace_init(&hd->head, &list);
1220 pthread_mutex_unlock(&hd->mutex);
1222 handled += handle_list(hd, &list);
1227 pthread_mutex_lock(&dp_mutex);
1228 dp_entries -= handled;
1229 pthread_mutex_unlock(&dp_mutex);
1233 static void process_trace_bufs(void)
1236 pthread_mutex_lock(&dp_mutex);
1237 while (!done && dp_entries == 0) {
1240 make_timespec(&ts, 50);
1241 pthread_cond_timedwait(&dp_cond, &dp_mutex, &ts);
1243 pthread_mutex_unlock(&dp_mutex);
1245 __process_trace_bufs();
1249 static void clean_trace_bufs(void)
1252 * No mutex needed here: we're only reading from the lists,
1256 __process_trace_bufs();
1259 static inline void read_err(int cpu, char *ifn)
1261 if (errno != EAGAIN)
1262 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
1263 cpu, ifn, errno, strerror(errno));
1266 static int net_sendfile(struct io_info *iop)
1270 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
1274 } else if (ret < (int)iop->ready) {
1275 fprintf(stderr, "short sendfile send (%d of %d)\n",
1283 static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
1285 struct devpath *dpp = iop->dpp;
1287 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
1289 return net_sendfile(iop);
1292 static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
1295 int i, nentries = 0;
1296 struct pdc_stats *sp;
1297 struct pollfd *pfd = tp->pfds;
1298 struct io_info *iop = tp->ios;
1300 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++, sp++) {
1301 if (pfd->revents & POLLIN || force_read) {
1302 if (fstat(iop->ifd, &sb) < 0) {
1305 } else if (sb.st_size > (off_t)iop->data_queued) {
1306 iop->ready = sb.st_size - iop->data_queued;
1307 iop->data_queued = sb.st_size;
1308 if (!net_sendfile_data(tp, iop)) {
1309 pdc_dr_update(iop->dpp, tp->cpu,
1320 incr_entries(nentries);
1325 static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
1327 int i, nentries = 0;
1328 struct trace_buf *tbp;
1329 struct pollfd *pfd = tp->pfds;
1330 struct io_info *iop = tp->ios;
1332 tbp = alloc_trace_buf(tp->cpu, buf_size);
1333 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1334 if (pfd->revents & POLLIN || force_read) {
1335 tbp->len = read(iop->ifd, tbp->buf, buf_size);
1337 pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
1338 add_trace_buf(iop->dpp, tp->cpu, &tbp);
1340 } else if (tbp->len == 0) {
1342 * Short reads after we're done stop us
1343 * from trying reads.
1348 read_err(tp->cpu, iop->ifn);
1349 if (errno != EAGAIN || tp->is_done)
1352 if (!piped_output && --nevs == 0)
1359 incr_entries(nentries);
1364 static int fill_ofname(struct io_info *iop, int cpu)
1368 char *dst = iop->ofn;
1371 len = snprintf(iop->ofn, sizeof(iop->ofn), "%s/", output_dir);
1373 len = snprintf(iop->ofn, sizeof(iop->ofn), "./");
1375 if (net_mode == Net_server) {
1376 struct cl_conn *nc = iop->nc;
1378 len += sprintf(dst + len, "%s-", nc->ch->hostname);
1379 len += strftime(dst + len, 64, "%F-%T/",
1380 gmtime(&iop->dpp->cl_connect_time));
1383 if (stat(iop->ofn, &sb) < 0) {
1384 if (errno != ENOENT) {
1386 "Destination dir %s stat failed: %d/%s\n",
1387 iop->ofn, errno, strerror(errno));
1390 if (mkdir(iop->ofn, 0755) < 0) {
1392 "Destination dir %s can't be made: %d/%s\n",
1393 iop->ofn, errno, strerror(errno));
1399 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1402 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1403 iop->dpp->buts_name, cpu);
1408 static int set_vbuf(struct io_info *iop, int mode, size_t size)
1410 iop->obuf = malloc(size);
1411 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
1412 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
1413 iop->dpp->path, (int)size, errno,
1422 static int iop_open(struct io_info *iop, int cpu)
1425 if (fill_ofname(iop, cpu))
1428 iop->ofp = my_fopen(iop->ofn, "w+");
1429 if (iop->ofp == NULL) {
1430 fprintf(stderr, "Open output file %s failed: %d/%s\n",
1431 iop->ofn, errno, strerror(errno));
1434 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
1435 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
1436 iop->ofn, errno, strerror(errno));
1441 iop->ofd = fileno(iop->ofp);
1445 static int open_ios(struct tracer *tp)
1448 struct io_info *iop;
1449 struct list_head *p;
1451 tp->ios = calloc(ndevs, sizeof(struct io_info));
1452 tp->pfds = calloc(ndevs, sizeof(struct pollfd));
1454 memset(tp->ios, 0, ndevs * sizeof(struct io_info));
1455 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
1460 __list_for_each(p, &devpaths) {
1461 struct devpath *dpp = list_entry(p, struct devpath, head);
1465 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
1466 debugfs_path, dpp->buts_name, tp->cpu);
1468 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
1470 fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
1471 tp->cpu, iop->ifn, errno, strerror(errno));
1475 init_mmap_info(&iop->mmap_info);
1478 pfd->events = POLLIN;
1482 else if (net_client_use_sendfile()) {
1483 iop->ofd = net_setup_client();
1486 net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
1487 } else if (net_mode == Net_none) {
1488 if (iop_open(iop, tp->cpu))
1492 * This ensures that the server knows about all
1493 * connections & devices before _any_ closes
1495 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
1506 close(iop->ifd); /* tp->nios _not_ bumped */
1510 static void close_iop(struct io_info *iop)
1512 struct mmap_info *mip = &iop->mmap_info;
1515 munmap(mip->fs_buf, mip->fs_buf_len);
1517 if (!piped_output) {
1518 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
1520 "Ignoring err: ftruncate(%s): %d/%s\n",
1521 iop->ofn, errno, strerror(errno));
1531 static void close_ios(struct tracer *tp)
1533 while (tp->nios > 0) {
1534 struct io_info *iop = &tp->ios[--tp->nios];
1536 iop->dpp->drops = get_drops(iop->dpp);
1542 else if (iop->ofd >= 0) {
1543 struct devpath *dpp = iop->dpp;
1545 net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
1546 net_close_connection(&iop->ofd);
1554 static int setup_mmap(int fd, unsigned int maxlen, struct mmap_info *mip)
1556 if (mip->fs_off + maxlen > mip->fs_buf_len) {
1557 unsigned long nr = max(16, mip->buf_nr);
1560 munlock(mip->fs_buf, mip->fs_buf_len);
1561 munmap(mip->fs_buf, mip->fs_buf_len);
1565 mip->fs_off = mip->fs_size & (mip->pagesize - 1);
1566 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
1567 mip->fs_max_size += mip->fs_buf_len;
1569 if (ftruncate(fd, mip->fs_max_size) < 0) {
1570 perror("__setup_mmap: ftruncate");
1574 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
1576 mip->fs_size - mip->fs_off);
1577 if (mip->fs_buf == MAP_FAILED) {
1578 perror("__setup_mmap: mmap");
1581 my_mlock(mip->fs_buf, mip->fs_buf_len);
1587 static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
1589 struct mmap_info *mip;
1590 int i, ret, nentries = 0;
1591 struct pollfd *pfd = tp->pfds;
1592 struct io_info *iop = tp->ios;
1594 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
1595 if (pfd->revents & POLLIN || force_read) {
1596 mip = &iop->mmap_info;
1598 ret = setup_mmap(iop->ofd, buf_size, mip);
1604 ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
1607 pdc_dr_update(iop->dpp, tp->cpu, ret);
1608 mip->fs_size += ret;
1611 } else if (ret == 0) {
1613 * Short reads after we're done stop us
1614 * from trying reads.
1619 read_err(tp->cpu, iop->ifn);
1620 if (errno != EAGAIN || tp->is_done)
1630 static void *thread_main(void *arg)
1635 struct tracer *tp = arg;
1637 ret = lock_on_cpu(tp->cpu);
1647 pthread_mutex_lock(&tp->mutex);
1649 pthread_cond_signal(&tp->cond);
1650 pthread_mutex_unlock(&tp->mutex);
1653 to_val = 50; /* Frequent partial handles */
1655 to_val = 500; /* 1/2 second intervals */
1657 while (!tp->is_done) {
1658 ndone = poll(tp->pfds, ndevs, to_val);
1659 if (ndone || piped_output)
1660 (void)handle_pfds(tp, ndone, piped_output);
1661 else if (ndone < 0 && errno != EINTR)
1662 fprintf(stderr, "Thread %d poll failed: %d/%s\n",
1663 tp->cpu, errno, strerror(errno));
1667 * Trace is stopped, pull data until we get a short read
1669 while (handle_pfds(tp, ndevs, 1) > 0)
1675 pthread_mutex_lock(&tp->mutex);
1678 pthread_cond_signal(&tp->cond);
1679 pthread_mutex_unlock(&tp->mutex);
1683 static int start_tracer(int cpu)
1687 tp = malloc(sizeof(*tp));
1688 memset(tp, 0, sizeof(*tp));
1690 INIT_LIST_HEAD(&tp->head);
1691 pthread_mutex_init(&tp->mutex, NULL);
1692 pthread_cond_init(&tp->cond, NULL);
1697 if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
1698 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
1699 cpu, errno, strerror(errno));
1703 pthread_mutex_lock(&tp->mutex);
1704 while (!tp->running && (tp->status == 0))
1705 pthread_cond_wait(&tp->cond, &tp->mutex);
1706 pthread_mutex_unlock(&tp->mutex);
1708 if (tp->status == 0) {
1709 list_add_tail(&tp->head, &tracers);
1713 fprintf(stderr, "FAILED to start thread on CPU %d\n", cpu);
1716 pthread_mutex_destroy(&tp->mutex);
1717 pthread_cond_destroy(&tp->cond);
1722 static int start_tracers(void)
1726 for (cpu = 0; cpu < ncpus; cpu++)
1727 if (start_tracer(cpu))
1733 static void stop_tracers(void)
1735 struct list_head *p;
1738 * Stop the tracing - makes the tracer threads clean up quicker.
1740 __list_for_each(p, &devpaths) {
1741 struct devpath *dpp = list_entry(p, struct devpath, head);
1742 (void)ioctl(dpp->fd, BLKTRACESTOP);
1746 * Tell each tracer to quit
1748 __list_for_each(p, &tracers) {
1749 struct tracer *tp = list_entry(p, struct tracer, head);
1754 static void del_tracers(void)
1756 struct list_head *p, *q;
1758 list_for_each_safe(p, q, &tracers) {
1759 struct tracer *tp = list_entry(p, struct tracer, head);
1761 list_del(&tp->head);
1767 static void wait_tracers(void)
1769 struct list_head *p;
1771 if (use_tracer_devpaths())
1772 process_trace_bufs();
1774 __list_for_each(p, &tracers) {
1776 struct tracer *tp = list_entry(p, struct tracer, head);
1778 pthread_mutex_lock(&tp->mutex);
1780 pthread_cond_wait(&tp->cond, &tp->mutex);
1781 pthread_mutex_unlock(&tp->mutex);
1783 ret = pthread_join(tp->thread, NULL);
1785 fprintf(stderr, "Thread join %d failed %d\n",
1789 if (use_tracer_devpaths())
1795 static void exit_tracing(void)
1797 signal(SIGINT, SIG_IGN);
1798 signal(SIGHUP, SIG_IGN);
1799 signal(SIGTERM, SIG_IGN);
1800 signal(SIGALRM, SIG_IGN);
1808 static void handle_sigint(__attribute__((__unused__)) int sig)
1814 static void show_stats(struct list_head *devpaths)
1817 struct list_head *p;
1818 unsigned long long nevents, data_read;
1819 unsigned long long total_drops = 0;
1820 unsigned long long total_events = 0;
1823 ofp = my_fopen("/dev/null", "w");
1827 __list_for_each(p, devpaths) {
1829 struct pdc_stats *sp;
1830 struct devpath *dpp = list_entry(p, struct devpath, head);
1832 if (net_mode == Net_server)
1833 printf("server: end of run for %s:%s\n",
1834 dpp->ch->hostname, dpp->buts_name);
1839 fprintf(ofp, "=== %s ===\n", dpp->buts_name);
1840 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
1842 * Estimate events if not known...
1844 if (sp->nevents == 0) {
1845 sp->nevents = sp->data_read /
1846 sizeof(struct blk_io_trace);
1850 " CPU%3d: %20llu events, %8llu KiB data\n",
1851 cpu, sp->nevents, (sp->data_read + 1023) >> 10);
1853 data_read += sp->data_read;
1854 nevents += sp->nevents;
1857 fprintf(ofp, " Total: %20llu events (dropped %llu),"
1858 " %8llu KiB data\n", nevents,
1859 dpp->drops, (data_read + 1024) >> 10);
1861 total_drops += dpp->drops;
1862 total_events += (nevents + dpp->drops);
1870 double drops_ratio = 1.0;
1873 drops_ratio = (double)total_drops/(double)total_events;
1875 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
1876 "Consider using a larger buffer size (-b) "
1877 "and/or more buffers (-n)\n",
1878 total_drops, 100.0 * drops_ratio);
1882 static int handle_args(int argc, char *argv[])
1886 int act_mask_tmp = 0;
1888 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1891 i = find_mask_map(optarg);
1893 fprintf(stderr, "Invalid action mask %s\n",
1901 if ((sscanf(optarg, "%x", &i) != 1) ||
1902 !valid_act_opt(i)) {
1904 "Invalid set action mask %s/0x%x\n",
1912 if (add_devpath(optarg) != 0)
1918 FILE *ifp = my_fopen(optarg, "r");
1922 "Invalid file for devices %s\n",
1927 while (fscanf(ifp, "%s\n", dev_line) == 1)
1928 if (add_devpath(dev_line) != 0)
1934 debugfs_path = optarg;
1938 output_name = optarg;
1941 kill_running_trace = 1;
1944 stop_watch = atoi(optarg);
1945 if (stop_watch <= 0) {
1947 "Invalid stopwatch value (%d secs)\n",
1954 printf("%s version %s\n", argv[0], blktrace_version);
1958 buf_size = strtoul(optarg, NULL, 10);
1959 if (buf_size <= 0 || buf_size > 16*1024) {
1960 fprintf(stderr, "Invalid buffer size (%lu)\n",
1967 buf_nr = strtoul(optarg, NULL, 10);
1970 "Invalid buffer nr (%lu)\n", buf_nr);
1975 output_dir = optarg;
1978 net_mode = Net_client;
1979 strcpy(hostname, optarg);
1982 net_mode = Net_server;
1985 net_port = atoi(optarg);
1988 net_use_sendfile = 0;
1991 show_usage(argv[0]);
1997 while (optind < argc)
1998 if (add_devpath(argv[optind++]) != 0)
2001 if (net_mode != Net_server && ndevs == 0) {
2002 show_usage(argv[0]);
2006 if (statfs(debugfs_path, &st) < 0 || st.f_type != (long)DEBUGFS_TYPE) {
2007 fprintf(stderr, "Invalid debug path %s: %d/%s\n",
2008 debugfs_path, errno, strerror(errno));
2012 if (act_mask_tmp != 0)
2013 act_mask = act_mask_tmp;
2016 * Set up for appropriate PFD handler based upon output name.
2018 if (net_client_use_sendfile())
2019 handle_pfds = handle_pfds_netclient;
2020 else if (net_client_use_send())
2021 handle_pfds = handle_pfds_entries;
2022 else if (output_name && (strcmp(output_name, "-") == 0)) {
2024 handle_pfds = handle_pfds_entries;
2026 setvbuf(pfp, NULL, _IONBF, 0);
2028 handle_pfds = handle_pfds_file;
2032 static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
2037 nc = malloc(sizeof(*nc));
2038 memset(nc, 0, sizeof(*nc));
2040 time(&nc->connect_time);
2045 list_add_tail(&nc->ch_head, &ch->conn_list);
2048 list_add_tail(&nc->ns_head, &ns->conn_list);
2050 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2053 static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
2056 net_close_connection(&nc->fd);
2058 list_del(&nc->ch_head);
2061 list_del(&nc->ns_head);
2063 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2068 static struct cl_host *net_find_client_host(struct net_server_s *ns,
2069 struct in_addr cl_in_addr)
2071 struct list_head *p;
2073 __list_for_each(p, &ns->ch_list) {
2074 struct cl_host *ch = list_entry(p, struct cl_host, head);
2076 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
2083 static struct cl_host *net_add_client_host(struct net_server_s *ns,
2084 struct sockaddr_in *addr)
2088 ch = malloc(sizeof(*ch));
2089 memset(ch, 0, sizeof(*ch));
2092 ch->cl_in_addr = addr->sin_addr;
2093 list_add_tail(&ch->head, &ns->ch_list);
2096 ch->hostname = strdup(inet_ntoa(addr->sin_addr));
2097 printf("server: connection from %s\n", ch->hostname);
2099 INIT_LIST_HEAD(&ch->conn_list);
2100 INIT_LIST_HEAD(&ch->devpaths);
2105 static void device_done(struct devpath *dpp, int ncpus)
2108 struct io_info *iop;
2110 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
2113 list_del(&dpp->head);
2117 static void net_ch_remove(struct cl_host *ch, int ncpus)
2119 struct list_head *p, *q;
2120 struct net_server_s *ns = ch->ns;
2122 list_for_each_safe(p, q, &ch->devpaths) {
2123 struct devpath *dpp = list_entry(p, struct devpath, head);
2124 device_done(dpp, ncpus);
2127 list_for_each_safe(p, q, &ch->conn_list) {
2128 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
2130 ch_rem_connection(ns, ch, nc);
2133 list_del(&ch->head);
2141 static void net_add_connection(struct net_server_s *ns)
2145 socklen_t socklen = sizeof(ns->addr);
2147 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
2150 * This is OK: we just won't accept this connection,
2155 ch = net_find_client_host(ns, ns->addr.sin_addr);
2157 ch = net_add_client_host(ns, &ns->addr);
2159 ch_add_connection(ns, ch, fd);
2163 static struct devpath *nc_add_dpp(struct cl_conn *nc,
2164 struct blktrace_net_hdr *bnh,
2165 time_t connect_time)
2168 struct io_info *iop;
2169 struct devpath *dpp;
2171 dpp = malloc(sizeof(*dpp));
2172 memset(dpp, 0, sizeof(*dpp));
2174 dpp->buts_name = strdup(bnh->buts_name);
2175 dpp->path = strdup(bnh->buts_name);
2178 dpp->cl_id = bnh->cl_id;
2179 dpp->cl_connect_time = connect_time;
2180 dpp->ncpus = nc->ncpus;
2181 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
2182 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
2184 list_add_tail(&dpp->head, &nc->ch->devpaths);
2187 dpp->ios = calloc(nc->ncpus, sizeof(*iop));
2188 memset(dpp->ios, 0, ndevs * sizeof(*iop));
2190 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
2193 init_mmap_info(&iop->mmap_info);
2195 if (iop_open(iop, cpu))
2203 * Need to unravel what's been done...
2206 close_iop(&dpp->ios[cpu--]);
2212 static struct devpath *nc_find_dpp(struct cl_conn *nc,
2213 struct blktrace_net_hdr *bnh)
2215 struct list_head *p;
2216 time_t connect_time = nc->connect_time;
2218 __list_for_each(p, &nc->ch->devpaths) {
2219 struct devpath *dpp = list_entry(p, struct devpath, head);
2221 if (!strcmp(dpp->buts_name, bnh->buts_name))
2224 if (dpp->cl_id == bnh->cl_id)
2225 connect_time = dpp->cl_connect_time;
2228 return nc_add_dpp(nc, bnh, connect_time);
2231 static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
2232 struct blktrace_net_hdr *bnh)
2235 struct io_info *iop = &dpp->ios[bnh->cpu];
2236 struct mmap_info *mip = &iop->mmap_info;
2238 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info)) {
2239 fprintf(stderr, "ncd(%s:%d): mmap failed\n",
2240 nc->ch->hostname, nc->fd);
2244 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
2246 pdc_dr_update(dpp, bnh->cpu, ret);
2247 mip->fs_size += ret;
2254 * Returns 1 if we closed a host - invalidates other polling information
2255 * that may be present.
2257 static int net_client_data(struct cl_conn *nc)
2260 struct devpath *dpp;
2261 struct blktrace_net_hdr bnh;
2263 ret = net_get_header(nc, &bnh);
2268 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
2272 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
2273 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
2277 if (!data_is_native) {
2278 bnh.magic = be32_to_cpu(bnh.magic);
2279 bnh.cpu = be32_to_cpu(bnh.cpu);
2280 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
2281 bnh.len = be32_to_cpu(bnh.len);
2282 bnh.cl_id = be32_to_cpu(bnh.cl_id);
2283 bnh.buf_size = be32_to_cpu(bnh.buf_size);
2284 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
2285 bnh.page_size = be32_to_cpu(bnh.page_size);
2288 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2289 fprintf(stderr, "ncd(%s:%d): bad data magic\n",
2290 nc->ch->hostname, nc->fd);
2294 if (nc->ncpus == -1)
2295 nc->ncpus = bnh.max_cpus;
2298 * len == 0 means the other end is sending us a new connection/dpp
2299 * len == 1 means that the other end signalled end-of-run
2301 dpp = nc_find_dpp(nc, &bnh);
2304 * Just adding in the dpp above is enough
2306 ack_open_close(nc->fd, dpp->buts_name);
2308 } else if (bnh.len == 1) {
2310 * overload cpu count with dropped events
2312 dpp->drops = bnh.cpu;
2314 ack_open_close(nc->fd, dpp->buts_name);
2315 if (--nc->ch->cl_opens == 0) {
2316 show_stats(&nc->ch->devpaths);
2317 net_ch_remove(nc->ch, nc->ncpus);
2321 net_client_read_data(nc, dpp, &bnh);
2326 static void handle_client_data(struct net_server_s *ns, int events)
2330 struct list_head *p, *q;
2333 list_for_each_safe(p, q, &ns->conn_list) {
2334 if (pfd->revents & POLLIN) {
2335 nc = list_entry(p, struct cl_conn, ns_head);
2337 if (net_client_data(nc) || --events == 0)
2344 static void net_setup_pfds(struct net_server_s *ns)
2347 struct list_head *p;
2349 ns->pfds[0].fd = ns->listen_fd;
2350 ns->pfds[0].events = POLLIN;
2353 __list_for_each(p, &ns->conn_list) {
2354 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
2357 pfd->events = POLLIN;
2362 static int net_server_handle_connections(struct net_server_s *ns)
2366 printf("server: waiting for connections...\n");
2370 events = poll(ns->pfds, ns->connects + 1, -1);
2372 if (errno != EINTR) {
2373 perror("FATAL: poll error");
2376 } else if (events > 0) {
2377 if (ns->pfds[0].revents & POLLIN) {
2378 net_add_connection(ns);
2383 handle_client_data(ns, events);
2390 static int net_server(void)
2394 struct net_server_s net_server;
2395 struct net_server_s *ns = &net_server;
2397 memset(ns, 0, sizeof(*ns));
2398 INIT_LIST_HEAD(&ns->ch_list);
2399 INIT_LIST_HEAD(&ns->conn_list);
2400 ns->pfds = malloc(sizeof(struct pollfd));
2402 fd = my_socket(AF_INET, SOCK_STREAM, 0);
2404 perror("server: socket");
2409 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
2410 perror("setsockopt");
2414 memset(&ns->addr, 0, sizeof(ns->addr));
2415 ns->addr.sin_family = AF_INET;
2416 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
2417 ns->addr.sin_port = htons(net_port);
2419 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
2424 if (listen(fd, 1) < 0) {
2430 * The actual server looping is done here:
2433 ret = net_server_handle_connections(ns);
2436 * Clean up and return...
2443 int main(int argc, char *argv[])
2447 setlocale(LC_NUMERIC, "en_US");
2448 pagesize = getpagesize();
2449 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
2451 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed %d/%s\n",
2452 errno, strerror(errno));
2457 if (handle_args(argc, argv)) {
2462 signal(SIGINT, handle_sigint);
2463 signal(SIGHUP, handle_sigint);
2464 signal(SIGTERM, handle_sigint);
2465 signal(SIGALRM, handle_sigint);
2466 signal(SIGPIPE, SIG_IGN);
2468 if (kill_running_trace) {
2469 struct devpath *dpp;
2470 struct list_head *p;
2472 __list_for_each(p, &devpaths) {
2473 dpp = list_entry(p, struct devpath, head);
2474 if (__stop_trace(dpp->fd)) {
2476 "BLKTRACETEARDOWN %s failed: %d/%s\n",
2477 dpp->path, errno, strerror(errno));
2480 } else if (net_mode == Net_server) {
2482 fprintf(stderr, "-o ignored in server mode\n");
2488 atexit(exit_tracing);
2490 if (net_mode == Net_client)
2491 printf("blktrace: connecting to %s\n", hostname);
2495 if (use_tracer_devpaths()) {
2496 if (setup_tracer_devpaths())
2500 handle_list = handle_list_file;
2502 handle_list = handle_list_net;
2505 ntracers = start_tracers();
2506 if (ntracers != ncpus)
2509 if (net_mode == Net_client)
2510 printf("blktrace: connected!\n");
2516 if (ntracers == ncpus)
2517 show_stats(&devpaths);
2519 if (net_client_use_send())
2520 close_client_connections();