2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
7 * Rewrite to have a single thread per CPU (managing all devices on that CPU)
8 * Alan D. Brunelle <alan.brunelle@hp.com> - January 2009
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 #include <sys/ioctl.h>
40 #include <sys/types.h>
44 #include <sys/param.h>
46 #include <sys/resource.h>
47 #include <sys/socket.h>
48 #include <netinet/in.h>
49 #include <arpa/inet.h>
51 #include <sys/sendfile.h>
57 * You may want to increase this even more, if you are logging at a high
58 * rate and see skipped/missed events
60 #define BUF_SIZE (512 * 1024)
63 #define FILE_VBUF_SIZE (128 * 1024)
65 #define DEBUGFS_TYPE (0x64626720)
66 #define TRACE_NET_PORT (8462)
81 * Generic stats collected: nevents can be _roughly_ estimated by data_read
82 * (discounting pdu...)
84 * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
87 unsigned long long data_read;
88 unsigned long long nevents;
92 struct list_head head;
93 char *path; /* path to device special file */
94 char *buts_name; /* name returned from bt kernel code */
95 struct pdc_stats *stats;
97 unsigned long long drops;
100 * For piped output only:
102 * Each tracer will have a tracer_devpath_head that it will add new
103 * data onto. It's list is protected above (tracer_devpath_head.mutex)
104 * and it will signal the processing thread using the dp_cond,
105 * dp_mutex & dp_entries variables above.
107 struct tracer_devpath_head *heads;
110 * For network server mode only:
114 time_t cl_connect_time;
119 * For piped output to stdout we will have each tracer thread (one per dev)
120 * tack buffers read from the relay queues on a per-device list.
122 * The main thread will then collect trace buffers from each of lists in turn.
124 * We will use a mutex to guard each of the trace_buf list. The tracers
125 * can then signal the main thread using <dp_cond,dp_mutex> and
126 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
127 * signal. When dp_entries is 0, the main thread will wait for that condition
130 * adb: It may be better just to have a large buffer per tracer per dev,
131 * and then use it as a ring-buffer. This would certainly cut down a lot
132 * of malloc/free thrashing, at the cost of more memory movements (potentially).
135 struct list_head head;
141 struct tracer_devpath_head {
142 pthread_mutex_t mutex;
143 struct list_head head;
144 struct trace_buf *prev;
148 * Used to handle the mmap() interfaces for output file (containing traces)
152 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
153 unsigned long buf_size, buf_nr;
158 * Each thread doing work on a (client) side of blktrace will have one
159 * of these. The ios array contains input/output information, pfds holds
160 * poll() data. The volatile's provide flags to/from the main executing
164 struct list_head head;
169 volatile int status, is_done;
173 * networking stuff follows. we include a magic number so we know whether
174 * to endianness convert or not.
176 * The len field is overloaded:
177 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu
178 * 1 - Indicates a "close" - Shut down connection orderly
180 * The cpu field is overloaded on close: it will contain the number of drops.
182 struct blktrace_net_hdr {
183 u32 magic; /* same as trace magic */
184 char buts_name[32]; /* trace name */
185 u32 cpu; /* for which cpu */
187 u32 len; /* length of following trace data */
188 u32 cl_id; /* id for set of client per-cpu connections */
189 u32 buf_size; /* client buf_size for this trace */
190 u32 buf_nr; /* client buf_nr for this trace */
191 u32 page_size; /* client page_size for this trace */
195 * Each host encountered has one of these. The head is used to link this
196 * on to the network server's ch_list. Connections associated with this
197 * host are linked on conn_list, and any devices traced on that host
198 * are connected on the devpaths list.
201 struct list_head head;
202 struct list_head conn_list;
203 struct list_head devpaths;
204 struct net_server_s *ns;
206 struct in_addr cl_in_addr;
207 int connects, ndevs, cl_opens;
211 * Each connection (client to server socket ('fd')) has one of these. A
212 * back reference to the host ('ch'), and lists headers (for the host
213 * list, and the network server conn_list) are also included.
216 struct list_head ch_head, ns_head;
223 * The network server requires some poll structures to be maintained -
224 * one per conection currently on conn_list. The nchs/ch_list values
225 * are for each host connected to this server. The addr field is used
226 * for scratch as new connections are established.
228 struct net_server_s {
229 struct list_head conn_list;
230 struct list_head ch_list;
232 int listen_fd, connects, nchs;
233 struct sockaddr_in addr;
237 * This structure is (generically) used to providide information
238 * for a read-to-write set of values.
240 * ifn & ifd represent input information
242 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
248 struct cl_conn *nc; /* Server network connection */
251 * mmap controlled output files
253 struct mmap_info mmap_info;
256 * Client network fields
259 unsigned long long data_queued;
262 * Input/output file descriptors & names
265 char ifn[MAXPATHLEN + 64];
266 char ofn[MAXPATHLEN + 64];
269 static char blktrace_version[] = "2.0.0";
272 * Linkage to blktrace helper routines (trace conversions)
274 int data_is_native = -1;
279 static cpu_set_t *online_cpus;
281 static int act_mask = ~0U;
282 static int kill_running_trace;
283 static int stop_watch;
284 static int piped_output;
286 static char *debugfs_path = "/sys/kernel/debug";
287 static char *output_name;
288 static char *output_dir;
290 static unsigned long buf_size = BUF_SIZE;
291 static unsigned long buf_nr = BUF_NR;
295 static LIST_HEAD(devpaths);
296 static LIST_HEAD(tracers);
298 static volatile int done;
301 * tracer threads add entries, the main thread takes them off and processes
302 * them. These protect the dp_entries variable.
304 static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
305 static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
306 static volatile int dp_entries;
309 * These synchronize master / thread interactions.
311 static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER;
312 static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER;
313 static volatile int nthreads_running;
314 static volatile int nthreads_leaving;
315 static volatile int nthreads_error;
316 static volatile int tracers_run;
319 * network cmd line params
321 static struct sockaddr_in hostname_addr;
322 static char hostname[MAXHOSTNAMELEN];
323 static int net_port = TRACE_NET_PORT;
324 static int net_use_sendfile = 1;
328 static int (*handle_pfds)(struct tracer *, int, int);
329 static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
331 #define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
332 static struct option l_opts[] = {
335 .has_arg = required_argument,
340 .name = "input-devs",
341 .has_arg = required_argument,
347 .has_arg = required_argument,
353 .has_arg = required_argument,
359 .has_arg = required_argument,
365 .has_arg = required_argument,
371 .has_arg = no_argument,
377 .has_arg = required_argument,
383 .has_arg = no_argument,
389 .has_arg = no_argument,
394 .name = "buffer-size",
395 .has_arg = required_argument,
400 .name = "num-sub-buffers",
401 .has_arg = required_argument,
406 .name = "output-dir",
407 .has_arg = required_argument,
413 .has_arg = no_argument,
419 .has_arg = required_argument,
425 .has_arg = required_argument,
430 .name = "no-sendfile",
431 .has_arg = no_argument,
440 static char usage_str[] = "\n\n" \
441 "-d <dev> | --dev=<dev>\n" \
442 "[ -r <debugfs path> | --relay=<debugfs path> ]\n" \
443 "[ -o <file> | --output=<file>]\n" \
444 "[ -D <dir> | --output-dir=<dir>\n" \
445 "[ -w <time> | --stopwatch=<time>]\n" \
446 "[ -a <action field> | --act-mask=<action field>]\n" \
447 "[ -A <action mask> | --set-mask=<action mask>]\n" \
448 "[ -b <size> | --buffer-size]\n" \
449 "[ -n <number> | --num-sub-buffers=<number>]\n" \
450 "[ -l | --listen]\n" \
451 "[ -h <hostname> | --host=<hostname>]\n" \
452 "[ -p <port number> | --port=<port number>]\n" \
453 "[ -s | --no-sendfile]\n" \
454 "[ -I <devs file> | --input-devs=<devs file>]\n" \
455 "[ -v <version> | --version]\n" \
456 "[ -V <version> | --version]\n" \
458 "\t-d Use specified device. May also be given last after options\n" \
459 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
460 "\t-o File(s) to send output to\n" \
461 "\t-D Directory to prepend to output file names\n" \
462 "\t-w Stop after defined time, in seconds\n" \
463 "\t-a Only trace specified actions. See documentation\n" \
464 "\t-A Give trace mask as a single value. See documentation\n" \
465 "\t-b Sub buffer size in KiB (default 512)\n" \
466 "\t-n Number of sub buffers (default 4)\n" \
467 "\t-l Run in network listen mode (blktrace server)\n" \
468 "\t-h Run in network client mode, connecting to the given host\n" \
469 "\t-p Network port to use (default 8462)\n" \
470 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
471 "\t-I Add devices found in <devs file>\n" \
472 "\t-v Print program version info\n" \
473 "\t-V Print program version info\n\n";
475 static void clear_events(struct pollfd *pfd)
481 static inline int net_client_use_sendfile(void)
483 return net_mode == Net_client && net_use_sendfile;
486 static inline int net_client_use_send(void)
488 return net_mode == Net_client && !net_use_sendfile;
491 static inline int use_tracer_devpaths(void)
493 return piped_output || net_client_use_send();
496 static inline int in_addr_eq(struct in_addr a, struct in_addr b)
498 return a.s_addr == b.s_addr;
501 static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
503 dpp->stats[cpu].data_read += data_read;
506 static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
508 dpp->stats[cpu].nevents += nevents;
511 static void show_usage(char *prog)
513 fprintf(stderr, "Usage: %s %s", prog, usage_str);
517 * Create a timespec 'msec' milliseconds into the future
519 static inline void make_timespec(struct timespec *tsp, long delta_msec)
523 gettimeofday(&now, NULL);
524 tsp->tv_sec = now.tv_sec;
525 tsp->tv_nsec = 1000L * now.tv_usec;
527 tsp->tv_nsec += (delta_msec * 1000000L);
528 if (tsp->tv_nsec > 1000000000L) {
529 long secs = tsp->tv_nsec / 1000000000L;
532 tsp->tv_nsec -= (secs * 1000000000L);
537 * Add a timer to ensure wait ends
539 static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
543 make_timespec(&ts, 50);
544 pthread_cond_timedwait(cond, mutex, &ts);
547 static void unblock_tracers(void)
549 pthread_mutex_lock(&mt_mutex);
551 pthread_cond_broadcast(&mt_cond);
552 pthread_mutex_unlock(&mt_mutex);
555 static void tracer_wait_unblock(struct tracer *tp)
557 pthread_mutex_lock(&mt_mutex);
558 while (!tp->is_done && !tracers_run)
559 pthread_cond_wait(&mt_cond, &mt_mutex);
560 pthread_mutex_unlock(&mt_mutex);
563 static void tracer_signal_ready(struct tracer *tp,
564 enum thread_status th_status,
567 pthread_mutex_lock(&mt_mutex);
570 if (th_status == Th_running)
572 else if (th_status == Th_error)
577 pthread_cond_signal(&mt_cond);
578 pthread_mutex_unlock(&mt_mutex);
581 static void wait_tracers_ready(int ncpus_started)
583 pthread_mutex_lock(&mt_mutex);
584 while ((nthreads_running + nthreads_error) < ncpus_started)
585 t_pthread_cond_wait(&mt_cond, &mt_mutex);
586 pthread_mutex_unlock(&mt_mutex);
589 static void wait_tracers_leaving(void)
591 pthread_mutex_lock(&mt_mutex);
592 while (nthreads_leaving < nthreads_running)
593 t_pthread_cond_wait(&mt_cond, &mt_mutex);
594 pthread_mutex_unlock(&mt_mutex);
597 static void init_mmap_info(struct mmap_info *mip)
599 mip->buf_size = buf_size;
600 mip->buf_nr = buf_nr;
601 mip->pagesize = pagesize;
604 static void net_close_connection(int *fd)
606 shutdown(*fd, SHUT_RDWR);
611 static void dpp_free(struct devpath *dpp)
620 free(dpp->buts_name);
624 static int lock_on_cpu(int cpu)
626 cpu_set_t * cpu_mask;
629 cpu_mask = CPU_ALLOC(max_cpus);
630 size = CPU_ALLOC_SIZE(max_cpus);
632 CPU_ZERO_S(size, cpu_mask);
633 CPU_SET_S(cpu, size, cpu_mask);
634 if (sched_setaffinity(0, size, cpu_mask) < 0) {
643 static int increase_limit(int resource, rlim_t increase)
646 int save_errno = errno;
648 if (!getrlimit(resource, &rlim)) {
649 rlim.rlim_cur += increase;
650 if (rlim.rlim_cur >= rlim.rlim_max)
651 rlim.rlim_max = rlim.rlim_cur + increase;
653 if (!setrlimit(resource, &rlim))
661 static int handle_open_failure(void)
663 if (errno == ENFILE || errno == EMFILE)
664 return increase_limit(RLIMIT_NOFILE, 16);
668 static int handle_mem_failure(size_t length)
671 return handle_open_failure();
672 else if (errno == ENOMEM)
673 return increase_limit(RLIMIT_MEMLOCK, 2 * length);
677 static FILE *my_fopen(const char *path, const char *mode)
682 fp = fopen(path, mode);
683 } while (fp == NULL && handle_open_failure());
688 static int my_open(const char *path, int flags)
693 fd = open(path, flags);
694 } while (fd < 0 && handle_open_failure());
699 static int my_socket(int domain, int type, int protocol)
704 fd = socket(domain, type, protocol);
705 } while (fd < 0 && handle_open_failure());
710 static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
715 fd = accept(sockfd, addr, addrlen);
716 } while (fd < 0 && handle_open_failure());
721 static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
727 new = mmap(addr, length, prot, flags, fd, offset);
728 } while (new == MAP_FAILED && handle_mem_failure(length));
733 static int my_mlock(struct tracer *tp,
734 const void *addr, size_t len)
739 ret = mlock(addr, len);
740 if ((retry >= 10) && tp && tp->is_done)
743 } while (ret < 0 && handle_mem_failure(len));
748 static int setup_mmap(int fd, unsigned int maxlen,
749 struct mmap_info *mip,
752 if (mip->fs_off + maxlen > mip->fs_buf_len) {
753 unsigned long nr = max(16, mip->buf_nr);
756 munlock(mip->fs_buf, mip->fs_buf_len);
757 munmap(mip->fs_buf, mip->fs_buf_len);
761 mip->fs_off = mip->fs_size & (mip->pagesize - 1);
762 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
763 mip->fs_max_size += mip->fs_buf_len;
765 if (ftruncate(fd, mip->fs_max_size) < 0) {
766 perror("setup_mmap: ftruncate");
770 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
772 mip->fs_size - mip->fs_off);
773 if (mip->fs_buf == MAP_FAILED) {
774 perror("setup_mmap: mmap");
777 if (my_mlock(tp, mip->fs_buf, mip->fs_buf_len) < 0) {
778 perror("setup_mlock: mlock");
786 static int __stop_trace(int fd)
789 * Should be stopped, don't complain if it isn't
791 ioctl(fd, BLKTRACESTOP);
792 return ioctl(fd, BLKTRACETEARDOWN);
795 static int write_data(char *buf, int len)
800 ret = fwrite(buf, len, 1, pfp);
801 if (ferror(pfp) || ret != 1) {
802 if (errno == EINTR) {
807 if (!piped_output || (errno != EPIPE && errno != EBADF)) {
808 fprintf(stderr, "write(%d) failed: %d/%s\n",
809 len, errno, strerror(errno));
823 * Returns the number of bytes read (successfully)
825 static int __net_recv_data(int fd, void *buf, unsigned int len)
827 unsigned int bytes_left = len;
829 while (bytes_left && !done) {
830 int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
835 if (errno == EAGAIN) {
839 perror("server: net_recv_data: recv failed");
847 return len - bytes_left;
850 static int net_recv_data(int fd, void *buf, unsigned int len)
852 return __net_recv_data(fd, buf, len);
856 * Returns number of bytes written
858 static int net_send_data(int fd, void *buf, unsigned int buf_len)
861 unsigned int bytes_left = buf_len;
864 ret = send(fd, buf, bytes_left, 0);
874 return buf_len - bytes_left;
877 static int net_send_header(int fd, int cpu, char *buts_name, int len)
879 struct blktrace_net_hdr hdr;
881 memset(&hdr, 0, sizeof(hdr));
883 hdr.magic = BLK_IO_TRACE_MAGIC;
884 memset(hdr.buts_name, 0, sizeof(hdr.buts_name));
885 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
886 hdr.buts_name[sizeof(hdr.buts_name) - 1] = '\0';
888 hdr.max_cpus = max_cpus;
890 hdr.cl_id = getpid();
891 hdr.buf_size = buf_size;
893 hdr.page_size = pagesize;
895 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
898 static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
900 struct blktrace_net_hdr ret_hdr;
902 net_send_header(fd, cpu, buts_name, len);
903 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
906 static void net_send_open(int fd, int cpu, char *buts_name)
908 net_send_open_close(fd, cpu, buts_name, 0);
911 static void net_send_close(int fd, char *buts_name, int drops)
914 * Overload CPU w/ number of drops
916 * XXX: Need to clear/set done around call - done=1 (which
917 * is true here) stops reads from happening... :-(
920 net_send_open_close(fd, drops, buts_name, 1);
924 static void ack_open_close(int fd, char *buts_name)
926 net_send_header(fd, 0, buts_name, 2);
929 static void net_send_drops(int fd)
933 __list_for_each(p, &devpaths) {
934 struct devpath *dpp = list_entry(p, struct devpath, head);
936 net_send_close(fd, dpp->buts_name, dpp->drops);
946 static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
949 int fl = fcntl(nc->fd, F_GETFL);
951 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
952 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
953 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
955 if (bytes_read == sizeof(*bnh))
957 else if (bytes_read == 0)
963 static int net_setup_addr(void)
965 struct sockaddr_in *addr = &hostname_addr;
967 memset(addr, 0, sizeof(*addr));
968 addr->sin_family = AF_INET;
969 addr->sin_port = htons(net_port);
971 if (inet_aton(hostname, &addr->sin_addr) != 1) {
972 struct hostent *hent;
974 hent = gethostbyname(hostname);
976 if (h_errno == TRY_AGAIN) {
979 } else if (h_errno == NO_RECOVERY) {
980 fprintf(stderr, "gethostbyname(%s)"
981 "non-recoverable error encountered\n",
985 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA
987 fprintf(stderr, "Host %s not found\n",
993 memcpy(&addr->sin_addr, hent->h_addr, 4);
994 memset(hostname, 0, sizeof(hostname));
995 strncpy(hostname, hent->h_name, sizeof(hostname));
996 hostname[sizeof(hostname) - 1] = '\0';
1002 static int net_setup_client(void)
1005 struct sockaddr_in *addr = &hostname_addr;
1007 fd = my_socket(AF_INET, SOCK_STREAM, 0);
1009 perror("client: socket");
1013 if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
1014 if (errno == ECONNREFUSED)
1016 "\nclient: Connection to %s refused, "
1017 "perhaps the server is not started?\n\n",
1020 perror("client: connect");
1029 static int open_client_connections(void)
1032 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
1034 cl_fds = calloc(ncpus, sizeof(*cl_fds));
1035 for (cpu = 0; cpu < max_cpus; cpu++) {
1036 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1038 cl_fds[cpu] = net_setup_client();
1039 if (cl_fds[cpu] < 0)
1046 close(cl_fds[cpu--]);
1051 static void close_client_connections(void)
1055 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
1057 for (cpu = 0, fdp = cl_fds; cpu < max_cpus; cpu++, fdp++) {
1058 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1061 net_send_drops(*fdp);
1062 net_close_connection(fdp);
1069 static void setup_buts(void)
1071 struct list_head *p;
1073 __list_for_each(p, &devpaths) {
1074 struct blk_user_trace_setup buts;
1075 struct devpath *dpp = list_entry(p, struct devpath, head);
1077 memset(&buts, 0, sizeof(buts));
1078 buts.buf_size = buf_size;
1079 buts.buf_nr = buf_nr;
1080 buts.act_mask = act_mask;
1082 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) {
1083 dpp->ncpus = max_cpus;
1084 dpp->buts_name = strdup(buts.name);
1087 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
1088 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
1090 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
1091 dpp->path, errno, strerror(errno));
1095 static void start_buts(void)
1097 struct list_head *p;
1099 __list_for_each(p, &devpaths) {
1100 struct devpath *dpp = list_entry(p, struct devpath, head);
1102 if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
1103 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
1104 dpp->path, errno, strerror(errno));
1109 static int get_drops(struct devpath *dpp)
1112 char fn[MAXPATHLEN + 64], tmp[256];
1114 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
1117 fd = my_open(fn, O_RDONLY);
1120 * This may be ok: the kernel may not support
1123 if (errno != ENOENT)
1124 fprintf(stderr, "Could not open %s: %d/%s\n",
1125 fn, errno, strerror(errno));
1127 } else if (read(fd, tmp, sizeof(tmp)) < 0) {
1128 fprintf(stderr, "Could not read %s: %d/%s\n",
1129 fn, errno, strerror(errno));
1137 static void get_all_drops(void)
1139 struct list_head *p;
1141 __list_for_each(p, &devpaths) {
1142 struct devpath *dpp = list_entry(p, struct devpath, head);
1144 dpp->drops = get_drops(dpp);
1148 static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
1150 struct trace_buf *tbp;
1152 tbp = malloc(sizeof(*tbp) + bufsize);
1153 INIT_LIST_HEAD(&tbp->head);
1155 tbp->buf = (void *)(tbp + 1);
1157 tbp->dpp = NULL; /* Will be set when tbp is added */
1162 static void free_tracer_heads(struct devpath *dpp)
1165 struct tracer_devpath_head *hd;
1167 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
1171 pthread_mutex_destroy(&hd->mutex);
1176 static int setup_tracer_devpaths(void)
1178 struct list_head *p;
1180 if (net_client_use_send())
1181 if (open_client_connections())
1184 __list_for_each(p, &devpaths) {
1186 struct tracer_devpath_head *hd;
1187 struct devpath *dpp = list_entry(p, struct devpath, head);
1189 dpp->heads = calloc(max_cpus, sizeof(struct tracer_devpath_head));
1190 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
1191 INIT_LIST_HEAD(&hd->head);
1192 pthread_mutex_init(&hd->mutex, NULL);
1200 static inline void add_trace_buf(struct devpath *dpp, int cpu,
1201 struct trace_buf **tbpp)
1203 struct trace_buf *tbp = *tbpp;
1204 struct tracer_devpath_head *hd = &dpp->heads[cpu];
1208 pthread_mutex_lock(&hd->mutex);
1209 list_add_tail(&tbp->head, &hd->head);
1210 pthread_mutex_unlock(&hd->mutex);
1212 *tbpp = alloc_trace_buf(cpu, buf_size);
1215 static inline void incr_entries(int entries_handled)
1217 pthread_mutex_lock(&dp_mutex);
1218 if (dp_entries == 0)
1219 pthread_cond_signal(&dp_cond);
1220 dp_entries += entries_handled;
1221 pthread_mutex_unlock(&dp_mutex);
1224 static void decr_entries(int handled)
1226 pthread_mutex_lock(&dp_mutex);
1227 dp_entries -= handled;
1228 pthread_mutex_unlock(&dp_mutex);
1231 static int wait_empty_entries(void)
1233 pthread_mutex_lock(&dp_mutex);
1234 while (!done && dp_entries == 0)
1235 t_pthread_cond_wait(&dp_cond, &dp_mutex);
1236 pthread_mutex_unlock(&dp_mutex);
1241 static int add_devpath(char *path)
1244 struct devpath *dpp;
1245 struct list_head *p;
1248 * Verify device is not duplicated
1250 __list_for_each(p, &devpaths) {
1251 struct devpath *tmp = list_entry(p, struct devpath, head);
1252 if (!strcmp(tmp->path, path))
1256 * Verify device is valid before going too far
1258 fd = my_open(path, O_RDONLY | O_NONBLOCK);
1260 fprintf(stderr, "Invalid path %s specified: %d/%s\n",
1261 path, errno, strerror(errno));
1265 dpp = malloc(sizeof(*dpp));
1266 memset(dpp, 0, sizeof(*dpp));
1267 dpp->path = strdup(path);
1270 list_add_tail(&dpp->head, &devpaths);
1275 static void rel_devpaths(void)
1277 struct list_head *p, *q;
1279 list_for_each_safe(p, q, &devpaths) {
1280 struct devpath *dpp = list_entry(p, struct devpath, head);
1282 list_del(&dpp->head);
1283 __stop_trace(dpp->fd);
1287 free_tracer_heads(dpp);
1294 static int flush_subbuf_net(struct trace_buf *tbp)
1296 int fd = cl_fds[tbp->cpu];
1297 struct devpath *dpp = tbp->dpp;
1299 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
1301 else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
1308 handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
1309 struct list_head *list)
1311 struct trace_buf *tbp;
1312 struct list_head *p, *q;
1313 int entries_handled = 0;
1315 list_for_each_safe(p, q, list) {
1316 tbp = list_entry(p, struct trace_buf, head);
1318 list_del(&tbp->head);
1321 if (cl_fds[tbp->cpu] >= 0) {
1322 if (flush_subbuf_net(tbp)) {
1323 close(cl_fds[tbp->cpu]);
1324 cl_fds[tbp->cpu] = -1;
1331 return entries_handled;
1335 * Tack 'tbp's buf onto the tail of 'prev's buf
1337 static struct trace_buf *tb_combine(struct trace_buf *prev,
1338 struct trace_buf *tbp)
1340 unsigned long tot_len;
1342 tot_len = prev->len + tbp->len;
1343 if (tot_len > buf_size) {
1345 * tbp->head isn't connected (it was 'prev'
1346 * so it had been taken off of the list
1347 * before). Therefore, we can realloc
1348 * the whole structures, as the other fields
1351 prev = realloc(prev, sizeof(*prev) + tot_len);
1352 prev->buf = (void *)(prev + 1);
1355 memcpy(prev->buf + prev->len, tbp->buf, tbp->len);
1356 prev->len = tot_len;
1362 static int handle_list_file(struct tracer_devpath_head *hd,
1363 struct list_head *list)
1365 int off, t_len, nevents;
1366 struct blk_io_trace *t;
1367 struct list_head *p, *q;
1368 int entries_handled = 0;
1369 struct trace_buf *tbp, *prev;
1372 list_for_each_safe(p, q, list) {
1373 tbp = list_entry(p, struct trace_buf, head);
1374 list_del(&tbp->head);
1378 * If there was some leftover before, tack this new
1379 * entry onto the tail of the previous one.
1382 tbp = tb_combine(prev, tbp);
1385 * See how many whole traces there are - send them
1386 * all out in one go.
1390 while (off + (int)sizeof(*t) <= tbp->len) {
1391 t = (struct blk_io_trace *)(tbp->buf + off);
1392 t_len = sizeof(*t) + t->pdu_len;
1393 if (off + t_len > tbp->len)
1400 pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
1403 * Write any full set of traces, any remaining data is kept
1404 * for the next pass.
1407 if (write_data(tbp->buf, off) || off == tbp->len) {
1413 * Move valid data to beginning of buffer
1416 memmove(tbp->buf, tbp->buf + off, tbp->len);
1424 return entries_handled;
1427 static void __process_trace_bufs(void)
1430 struct list_head *p;
1431 struct list_head list;
1434 __list_for_each(p, &devpaths) {
1435 struct devpath *dpp = list_entry(p, struct devpath, head);
1436 struct tracer_devpath_head *hd = dpp->heads;
1438 for (cpu = 0; cpu < max_cpus; cpu++, hd++) {
1439 pthread_mutex_lock(&hd->mutex);
1440 if (list_empty(&hd->head)) {
1441 pthread_mutex_unlock(&hd->mutex);
1445 list_replace_init(&hd->head, &list);
1446 pthread_mutex_unlock(&hd->mutex);
1448 handled += handle_list(hd, &list);
1453 decr_entries(handled);
1456 static void process_trace_bufs(void)
1458 while (wait_empty_entries())
1459 __process_trace_bufs();
1462 static void clean_trace_bufs(void)
1465 * No mutex needed here: we're only reading from the lists,
1469 __process_trace_bufs();
1472 static inline void read_err(int cpu, char *ifn)
1474 if (errno != EAGAIN)
1475 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
1476 cpu, ifn, errno, strerror(errno));
1479 static int net_sendfile(struct io_info *iop)
1483 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
1487 } else if (ret < (int)iop->ready) {
1488 fprintf(stderr, "short sendfile send (%d of %d)\n",
1496 static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
1498 struct devpath *dpp = iop->dpp;
1500 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
1502 return net_sendfile(iop);
1505 static int fill_ofname(struct io_info *iop, int cpu)
1509 char *dst = iop->ofn;
1512 len = snprintf(iop->ofn, sizeof(iop->ofn), "%s/", output_dir);
1514 len = snprintf(iop->ofn, sizeof(iop->ofn), "./");
1516 if (net_mode == Net_server) {
1517 struct cl_conn *nc = iop->nc;
1519 len += sprintf(dst + len, "%s-", nc->ch->hostname);
1520 len += strftime(dst + len, 64, "%F-%T/",
1521 gmtime(&iop->dpp->cl_connect_time));
1524 if (stat(iop->ofn, &sb) < 0) {
1525 if (errno != ENOENT) {
1527 "Destination dir %s stat failed: %d/%s\n",
1528 iop->ofn, errno, strerror(errno));
1532 * There is no synchronization between multiple threads
1533 * trying to create the directory at once. It's harmless
1534 * to let them try, so just detect the problem and move on.
1536 if (mkdir(iop->ofn, 0755) < 0 && errno != EEXIST) {
1538 "Destination dir %s can't be made: %d/%s\n",
1539 iop->ofn, errno, strerror(errno));
1545 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1548 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1549 iop->dpp->buts_name, cpu);
1554 static int set_vbuf(struct io_info *iop, int mode, size_t size)
1556 iop->obuf = malloc(size);
1557 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
1558 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
1559 iop->dpp->path, (int)size, errno,
1568 static int iop_open(struct io_info *iop, int cpu)
1571 if (fill_ofname(iop, cpu))
1574 iop->ofp = my_fopen(iop->ofn, "w+");
1575 if (iop->ofp == NULL) {
1576 fprintf(stderr, "Open output file %s failed: %d/%s\n",
1577 iop->ofn, errno, strerror(errno));
1581 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
1582 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
1583 iop->ofn, errno, strerror(errno));
1588 iop->ofd = fileno(iop->ofp);
1592 static void close_iop(struct io_info *iop)
1594 struct mmap_info *mip = &iop->mmap_info;
1597 munmap(mip->fs_buf, mip->fs_buf_len);
1599 if (!piped_output) {
1600 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
1602 "Ignoring err: ftruncate(%s): %d/%s\n",
1603 iop->ofn, errno, strerror(errno));
1613 static void close_ios(struct tracer *tp)
1615 while (tp->nios > 0) {
1616 struct io_info *iop = &tp->ios[--tp->nios];
1618 iop->dpp->drops = get_drops(iop->dpp);
1624 else if (iop->ofd >= 0) {
1625 struct devpath *dpp = iop->dpp;
1627 net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
1628 net_close_connection(&iop->ofd);
1636 static int open_ios(struct tracer *tp)
1639 struct io_info *iop;
1640 struct list_head *p;
1642 tp->ios = calloc(ndevs, sizeof(struct io_info));
1643 memset(tp->ios, 0, ndevs * sizeof(struct io_info));
1645 tp->pfds = calloc(ndevs, sizeof(struct pollfd));
1646 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
1651 __list_for_each(p, &devpaths) {
1652 struct devpath *dpp = list_entry(p, struct devpath, head);
1656 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
1657 debugfs_path, dpp->buts_name, tp->cpu);
1659 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
1661 fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
1662 tp->cpu, iop->ifn, errno, strerror(errno));
1666 init_mmap_info(&iop->mmap_info);
1669 pfd->events = POLLIN;
1673 else if (net_client_use_sendfile()) {
1674 iop->ofd = net_setup_client();
1677 net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
1678 } else if (net_mode == Net_none) {
1679 if (iop_open(iop, tp->cpu))
1683 * This ensures that the server knows about all
1684 * connections & devices before _any_ closes
1686 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
1697 close(iop->ifd); /* tp->nios _not_ bumped */
1702 static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
1704 struct mmap_info *mip;
1705 int i, ret, nentries = 0;
1706 struct pollfd *pfd = tp->pfds;
1707 struct io_info *iop = tp->ios;
1709 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
1710 if (pfd->revents & POLLIN || force_read) {
1711 mip = &iop->mmap_info;
1713 ret = setup_mmap(iop->ofd, buf_size, mip, tp);
1719 ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
1722 pdc_dr_update(iop->dpp, tp->cpu, ret);
1723 mip->fs_size += ret;
1726 } else if (ret == 0) {
1728 * Short reads after we're done stop us
1729 * from trying reads.
1734 read_err(tp->cpu, iop->ifn);
1735 if (errno != EAGAIN || tp->is_done)
1745 static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
1748 int i, nentries = 0;
1749 struct pollfd *pfd = tp->pfds;
1750 struct io_info *iop = tp->ios;
1752 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1753 if (pfd->revents & POLLIN || force_read) {
1754 if (fstat(iop->ifd, &sb) < 0) {
1757 } else if (sb.st_size > (off_t)iop->data_queued) {
1758 iop->ready = sb.st_size - iop->data_queued;
1759 iop->data_queued = sb.st_size;
1761 if (!net_sendfile_data(tp, iop)) {
1762 pdc_dr_update(iop->dpp, tp->cpu,
1774 incr_entries(nentries);
1779 static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
1781 int i, nentries = 0;
1782 struct trace_buf *tbp;
1783 struct pollfd *pfd = tp->pfds;
1784 struct io_info *iop = tp->ios;
1786 tbp = alloc_trace_buf(tp->cpu, buf_size);
1787 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1788 if (pfd->revents & POLLIN || force_read) {
1789 tbp->len = read(iop->ifd, tbp->buf, buf_size);
1791 pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
1792 add_trace_buf(iop->dpp, tp->cpu, &tbp);
1794 } else if (tbp->len == 0) {
1796 * Short reads after we're done stop us
1797 * from trying reads.
1802 read_err(tp->cpu, iop->ifn);
1803 if (errno != EAGAIN || tp->is_done)
1806 if (!piped_output && --nevs == 0)
1813 incr_entries(nentries);
1818 static void *thread_main(void *arg)
1820 int ret, ndone, to_val;
1821 struct tracer *tp = arg;
1823 ret = lock_on_cpu(tp->cpu);
1832 to_val = 50; /* Frequent partial handles */
1834 to_val = 500; /* 1/2 second intervals */
1837 tracer_signal_ready(tp, Th_running, 0);
1838 tracer_wait_unblock(tp);
1840 while (!tp->is_done) {
1841 ndone = poll(tp->pfds, ndevs, to_val);
1842 if (ndone || piped_output)
1843 (void)handle_pfds(tp, ndone, piped_output);
1844 else if (ndone < 0 && errno != EINTR)
1845 fprintf(stderr, "Thread %d poll failed: %d/%s\n",
1846 tp->cpu, errno, strerror(errno));
1850 * Trace is stopped, pull data until we get a short read
1852 while (handle_pfds(tp, ndevs, 1) > 0)
1856 tracer_signal_ready(tp, Th_leaving, 0);
1860 tracer_signal_ready(tp, Th_error, ret);
1864 static int start_tracer(int cpu)
1868 tp = malloc(sizeof(*tp));
1869 memset(tp, 0, sizeof(*tp));
1871 INIT_LIST_HEAD(&tp->head);
1875 if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
1876 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
1877 cpu, errno, strerror(errno));
1882 list_add_tail(&tp->head, &tracers);
1886 static void start_tracers(void)
1888 int cpu, started = 0;
1889 struct list_head *p;
1890 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
1892 for (cpu = 0; cpu < max_cpus; cpu++) {
1893 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1895 if (start_tracer(cpu))
1900 wait_tracers_ready(started);
1902 __list_for_each(p, &tracers) {
1903 struct tracer *tp = list_entry(p, struct tracer, head);
1906 "FAILED to start thread on CPU %d: %d/%s\n",
1907 tp->cpu, tp->status, strerror(tp->status));
1911 static void stop_tracers(void)
1913 struct list_head *p;
1916 * Stop the tracing - makes the tracer threads clean up quicker.
1918 __list_for_each(p, &devpaths) {
1919 struct devpath *dpp = list_entry(p, struct devpath, head);
1920 (void)ioctl(dpp->fd, BLKTRACESTOP);
1924 * Tell each tracer to quit
1926 __list_for_each(p, &tracers) {
1927 struct tracer *tp = list_entry(p, struct tracer, head);
1930 pthread_cond_broadcast(&mt_cond);
1933 static void del_tracers(void)
1935 struct list_head *p, *q;
1937 list_for_each_safe(p, q, &tracers) {
1938 struct tracer *tp = list_entry(p, struct tracer, head);
1940 list_del(&tp->head);
1945 static void wait_tracers(void)
1947 struct list_head *p;
1949 if (use_tracer_devpaths())
1950 process_trace_bufs();
1952 wait_tracers_leaving();
1954 __list_for_each(p, &tracers) {
1956 struct tracer *tp = list_entry(p, struct tracer, head);
1958 ret = pthread_join(tp->thread, NULL);
1960 fprintf(stderr, "Thread join %d failed %d\n",
1964 if (use_tracer_devpaths())
1970 static void exit_tracing(void)
1972 signal(SIGINT, SIG_IGN);
1973 signal(SIGHUP, SIG_IGN);
1974 signal(SIGTERM, SIG_IGN);
1975 signal(SIGALRM, SIG_IGN);
1983 static void handle_sigint(__attribute__((__unused__)) int sig)
1989 static void show_stats(struct list_head *devpaths)
1992 struct list_head *p;
1993 unsigned long long nevents, data_read;
1994 unsigned long long total_drops = 0;
1995 unsigned long long total_events = 0;
1998 ofp = my_fopen("/dev/null", "w");
2002 __list_for_each(p, devpaths) {
2004 struct pdc_stats *sp;
2005 struct devpath *dpp = list_entry(p, struct devpath, head);
2007 if (net_mode == Net_server)
2008 printf("server: end of run for %s:%s\n",
2009 dpp->ch->hostname, dpp->buts_name);
2014 fprintf(ofp, "=== %s ===\n", dpp->buts_name);
2015 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
2017 * Estimate events if not known...
2019 if (sp->nevents == 0) {
2020 sp->nevents = sp->data_read /
2021 sizeof(struct blk_io_trace);
2025 " CPU%3d: %20llu events, %8llu KiB data\n",
2026 cpu, sp->nevents, (sp->data_read + 1023) >> 10);
2028 data_read += sp->data_read;
2029 nevents += sp->nevents;
2032 fprintf(ofp, " Total: %20llu events (dropped %llu),"
2033 " %8llu KiB data\n", nevents,
2034 dpp->drops, (data_read + 1024) >> 10);
2036 total_drops += dpp->drops;
2037 total_events += (nevents + dpp->drops);
2045 double drops_ratio = 1.0;
2048 drops_ratio = (double)total_drops/(double)total_events;
2050 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
2051 "Consider using a larger buffer size (-b) "
2052 "and/or more buffers (-n)\n",
2053 total_drops, 100.0 * drops_ratio);
2057 static int handle_args(int argc, char *argv[])
2061 int act_mask_tmp = 0;
2063 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
2066 i = find_mask_map(optarg);
2068 fprintf(stderr, "Invalid action mask %s\n",
2076 if ((sscanf(optarg, "%x", &i) != 1) ||
2077 !valid_act_opt(i)) {
2079 "Invalid set action mask %s/0x%x\n",
2087 if (add_devpath(optarg) != 0)
2093 FILE *ifp = my_fopen(optarg, "r");
2097 "Invalid file for devices %s\n",
2102 while (fscanf(ifp, "%s\n", dev_line) == 1) {
2103 if (add_devpath(dev_line) != 0) {
2113 debugfs_path = optarg;
2117 output_name = optarg;
2120 kill_running_trace = 1;
2123 stop_watch = atoi(optarg);
2124 if (stop_watch <= 0) {
2126 "Invalid stopwatch value (%d secs)\n",
2133 printf("%s version %s\n", argv[0], blktrace_version);
2137 buf_size = strtoul(optarg, NULL, 10);
2138 if (buf_size <= 0 || buf_size > 16*1024) {
2139 fprintf(stderr, "Invalid buffer size (%lu)\n",
2146 buf_nr = strtoul(optarg, NULL, 10);
2149 "Invalid buffer nr (%lu)\n", buf_nr);
2154 output_dir = optarg;
2157 net_mode = Net_client;
2158 memset(hostname, 0, sizeof(hostname));
2159 strncpy(hostname, optarg, sizeof(hostname));
2160 hostname[sizeof(hostname) - 1] = '\0';
2163 net_mode = Net_server;
2166 net_port = atoi(optarg);
2169 net_use_sendfile = 0;
2172 show_usage(argv[0]);
2178 while (optind < argc)
2179 if (add_devpath(argv[optind++]) != 0)
2182 if (net_mode != Net_server && ndevs == 0) {
2183 show_usage(argv[0]);
2187 if (statfs(debugfs_path, &st) < 0) {
2188 fprintf(stderr, "Invalid debug path %s: %d/%s\n",
2189 debugfs_path, errno, strerror(errno));
2193 if (st.f_type != (long)DEBUGFS_TYPE) {
2194 fprintf(stderr, "Debugfs is not mounted at %s\n", debugfs_path);
2198 if (act_mask_tmp != 0)
2199 act_mask = act_mask_tmp;
2201 if (net_mode == Net_client && net_setup_addr())
2205 * Set up for appropriate PFD handler based upon output name.
2207 if (net_client_use_sendfile())
2208 handle_pfds = handle_pfds_netclient;
2209 else if (net_client_use_send())
2210 handle_pfds = handle_pfds_entries;
2211 else if (output_name && (strcmp(output_name, "-") == 0)) {
2213 handle_pfds = handle_pfds_entries;
2215 if (setvbuf(pfp, NULL, _IONBF, 0)) {
2216 perror("setvbuf stdout");
2220 handle_pfds = handle_pfds_file;
2224 static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
2229 nc = malloc(sizeof(*nc));
2230 memset(nc, 0, sizeof(*nc));
2232 time(&nc->connect_time);
2237 list_add_tail(&nc->ch_head, &ch->conn_list);
2240 list_add_tail(&nc->ns_head, &ns->conn_list);
2242 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2245 static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
2248 net_close_connection(&nc->fd);
2250 list_del(&nc->ch_head);
2253 list_del(&nc->ns_head);
2255 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2260 static struct cl_host *net_find_client_host(struct net_server_s *ns,
2261 struct in_addr cl_in_addr)
2263 struct list_head *p;
2265 __list_for_each(p, &ns->ch_list) {
2266 struct cl_host *ch = list_entry(p, struct cl_host, head);
2268 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
2275 static struct cl_host *net_add_client_host(struct net_server_s *ns,
2276 struct sockaddr_in *addr)
2280 ch = malloc(sizeof(*ch));
2281 memset(ch, 0, sizeof(*ch));
2284 ch->cl_in_addr = addr->sin_addr;
2285 list_add_tail(&ch->head, &ns->ch_list);
2288 ch->hostname = strdup(inet_ntoa(addr->sin_addr));
2289 printf("server: connection from %s\n", ch->hostname);
2291 INIT_LIST_HEAD(&ch->conn_list);
2292 INIT_LIST_HEAD(&ch->devpaths);
2297 static void device_done(struct devpath *dpp, int ncpus)
2300 struct io_info *iop;
2302 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
2305 list_del(&dpp->head);
2309 static void net_ch_remove(struct cl_host *ch, int ncpus)
2311 struct list_head *p, *q;
2312 struct net_server_s *ns = ch->ns;
2314 list_for_each_safe(p, q, &ch->devpaths) {
2315 struct devpath *dpp = list_entry(p, struct devpath, head);
2316 device_done(dpp, ncpus);
2319 list_for_each_safe(p, q, &ch->conn_list) {
2320 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
2322 ch_rem_connection(ns, ch, nc);
2325 list_del(&ch->head);
2333 static void net_add_connection(struct net_server_s *ns)
2337 socklen_t socklen = sizeof(ns->addr);
2339 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
2342 * This is OK: we just won't accept this connection,
2347 ch = net_find_client_host(ns, ns->addr.sin_addr);
2349 ch = net_add_client_host(ns, &ns->addr);
2351 ch_add_connection(ns, ch, fd);
2355 static struct devpath *nc_add_dpp(struct cl_conn *nc,
2356 struct blktrace_net_hdr *bnh,
2357 time_t connect_time)
2360 struct io_info *iop;
2361 struct devpath *dpp;
2363 dpp = malloc(sizeof(*dpp));
2364 memset(dpp, 0, sizeof(*dpp));
2366 dpp->buts_name = strdup(bnh->buts_name);
2367 dpp->path = strdup(bnh->buts_name);
2370 dpp->cl_id = bnh->cl_id;
2371 dpp->cl_connect_time = connect_time;
2372 dpp->ncpus = nc->ncpus;
2373 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
2374 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
2376 list_add_tail(&dpp->head, &nc->ch->devpaths);
2379 dpp->ios = calloc(nc->ncpus, sizeof(*iop));
2380 memset(dpp->ios, 0, ndevs * sizeof(*iop));
2382 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
2385 init_mmap_info(&iop->mmap_info);
2387 if (iop_open(iop, cpu))
2395 * Need to unravel what's been done...
2398 close_iop(&dpp->ios[cpu--]);
2404 static struct devpath *nc_find_dpp(struct cl_conn *nc,
2405 struct blktrace_net_hdr *bnh)
2407 struct list_head *p;
2408 time_t connect_time = nc->connect_time;
2410 __list_for_each(p, &nc->ch->devpaths) {
2411 struct devpath *dpp = list_entry(p, struct devpath, head);
2413 if (!strcmp(dpp->buts_name, bnh->buts_name))
2416 if (dpp->cl_id == bnh->cl_id)
2417 connect_time = dpp->cl_connect_time;
2420 return nc_add_dpp(nc, bnh, connect_time);
2423 static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
2424 struct blktrace_net_hdr *bnh)
2427 struct io_info *iop = &dpp->ios[bnh->cpu];
2428 struct mmap_info *mip = &iop->mmap_info;
2430 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info, NULL)) {
2431 fprintf(stderr, "ncd(%s:%d): mmap failed\n",
2432 nc->ch->hostname, nc->fd);
2436 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
2438 pdc_dr_update(dpp, bnh->cpu, ret);
2439 mip->fs_size += ret;
2446 * Returns 1 if we closed a host - invalidates other polling information
2447 * that may be present.
2449 static int net_client_data(struct cl_conn *nc)
2452 struct devpath *dpp;
2453 struct blktrace_net_hdr bnh;
2455 ret = net_get_header(nc, &bnh);
2460 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
2464 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
2465 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
2469 if (!data_is_native) {
2470 bnh.magic = be32_to_cpu(bnh.magic);
2471 bnh.cpu = be32_to_cpu(bnh.cpu);
2472 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
2473 bnh.len = be32_to_cpu(bnh.len);
2474 bnh.cl_id = be32_to_cpu(bnh.cl_id);
2475 bnh.buf_size = be32_to_cpu(bnh.buf_size);
2476 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
2477 bnh.page_size = be32_to_cpu(bnh.page_size);
2480 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2481 fprintf(stderr, "ncd(%s:%d): bad data magic\n",
2482 nc->ch->hostname, nc->fd);
2486 if (nc->ncpus == -1)
2487 nc->ncpus = bnh.max_cpus;
2490 * len == 0 means the other end is sending us a new connection/dpp
2491 * len == 1 means that the other end signalled end-of-run
2493 dpp = nc_find_dpp(nc, &bnh);
2496 * Just adding in the dpp above is enough
2498 ack_open_close(nc->fd, dpp->buts_name);
2500 } else if (bnh.len == 1) {
2502 * overload cpu count with dropped events
2504 dpp->drops = bnh.cpu;
2506 ack_open_close(nc->fd, dpp->buts_name);
2507 if (--nc->ch->cl_opens == 0) {
2508 show_stats(&nc->ch->devpaths);
2509 net_ch_remove(nc->ch, nc->ncpus);
2513 net_client_read_data(nc, dpp, &bnh);
2518 static void handle_client_data(struct net_server_s *ns, int events)
2522 struct list_head *p, *q;
2525 list_for_each_safe(p, q, &ns->conn_list) {
2526 if (pfd->revents & POLLIN) {
2527 nc = list_entry(p, struct cl_conn, ns_head);
2529 if (net_client_data(nc) || --events == 0)
2536 static void net_setup_pfds(struct net_server_s *ns)
2539 struct list_head *p;
2541 ns->pfds[0].fd = ns->listen_fd;
2542 ns->pfds[0].events = POLLIN;
2545 __list_for_each(p, &ns->conn_list) {
2546 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
2549 pfd->events = POLLIN;
2554 static int net_server_handle_connections(struct net_server_s *ns)
2558 printf("server: waiting for connections...\n");
2562 events = poll(ns->pfds, ns->connects + 1, -1);
2564 if (errno != EINTR) {
2565 perror("FATAL: poll error");
2568 } else if (events > 0) {
2569 if (ns->pfds[0].revents & POLLIN) {
2570 net_add_connection(ns);
2575 handle_client_data(ns, events);
2582 static int net_server(void)
2586 struct net_server_s net_server;
2587 struct net_server_s *ns = &net_server;
2589 memset(ns, 0, sizeof(*ns));
2590 INIT_LIST_HEAD(&ns->ch_list);
2591 INIT_LIST_HEAD(&ns->conn_list);
2592 ns->pfds = malloc(sizeof(struct pollfd));
2594 fd = my_socket(AF_INET, SOCK_STREAM, 0);
2596 perror("server: socket");
2601 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
2602 perror("setsockopt");
2606 memset(&ns->addr, 0, sizeof(ns->addr));
2607 ns->addr.sin_family = AF_INET;
2608 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
2609 ns->addr.sin_port = htons(net_port);
2611 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
2616 if (listen(fd, 1) < 0) {
2622 * The actual server looping is done here:
2625 ret = net_server_handle_connections(ns);
2628 * Clean up and return...
2635 static int run_tracers(void)
2637 atexit(exit_tracing);
2638 if (net_mode == Net_client)
2639 printf("blktrace: connecting to %s\n", hostname);
2643 if (use_tracer_devpaths()) {
2644 if (setup_tracer_devpaths())
2648 handle_list = handle_list_file;
2650 handle_list = handle_list_net;
2654 if (nthreads_running == ncpus) {
2657 if (net_mode == Net_client)
2658 printf("blktrace: connected!\n");
2665 if (nthreads_running == ncpus)
2666 show_stats(&devpaths);
2667 if (net_client_use_send())
2668 close_client_connections();
2674 static cpu_set_t *get_online_cpus(void)
2679 int cpuid, prevcpuid = -1;
2681 int n, ncpu, curcpu = 0;
2684 ncpu = sysconf(_SC_NPROCESSORS_CONF);
2688 cpu_nums = malloc(sizeof(int)*ncpu);
2695 * There is no way to easily get maximum CPU number. So we have to
2696 * parse the file first to find it out and then create appropriate
2699 cpus = my_fopen("/sys/devices/system/cpu/online", "r");
2701 n = fscanf(cpus, "%d%c", &cpuid, &nextch);
2704 if (n == 2 && nextch == '-') {
2708 if (prevcpuid == -1)
2710 while (prevcpuid <= cpuid) {
2711 /* More CPUs listed than configured? */
2712 if (curcpu >= ncpu) {
2716 cpu_nums[curcpu++] = prevcpuid++;
2723 max_cpus = cpu_nums[ncpu - 1] + 1;
2725 /* Now that we have maximum cpu number, create a cpuset */
2726 set = CPU_ALLOC(max_cpus);
2731 alloc_size = CPU_ALLOC_SIZE(max_cpus);
2732 CPU_ZERO_S(alloc_size, set);
2734 for (curcpu = 0; curcpu < ncpu; curcpu++)
2735 CPU_SET_S(cpu_nums[curcpu], alloc_size, set);
2742 int main(int argc, char *argv[])
2746 setlocale(LC_NUMERIC, "en_US");
2747 pagesize = getpagesize();
2748 online_cpus = get_online_cpus();
2750 fprintf(stderr, "cannot get online cpus %d/%s\n",
2751 errno, strerror(errno));
2754 } else if (handle_args(argc, argv)) {
2759 ncpus = CPU_COUNT_S(CPU_ALLOC_SIZE(max_cpus), online_cpus);
2760 if (ndevs > 1 && output_name && strcmp(output_name, "-") != 0) {
2761 fprintf(stderr, "-o not supported with multiple devices\n");
2766 signal(SIGINT, handle_sigint);
2767 signal(SIGHUP, handle_sigint);
2768 signal(SIGTERM, handle_sigint);
2769 signal(SIGALRM, handle_sigint);
2770 signal(SIGPIPE, SIG_IGN);
2772 if (kill_running_trace) {
2773 struct devpath *dpp;
2774 struct list_head *p;
2776 __list_for_each(p, &devpaths) {
2777 dpp = list_entry(p, struct devpath, head);
2778 if (__stop_trace(dpp->fd)) {
2780 "BLKTRACETEARDOWN %s failed: %d/%s\n",
2781 dpp->path, errno, strerror(errno));
2784 } else if (net_mode == Net_server) {
2786 fprintf(stderr, "-o ignored in server mode\n");
2791 ret = run_tracers();