Invoke gethostbyname once, handle errors better
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd 4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
46e37c55 5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
d956a2cd 6 *
3fe0b570
AB
7 * Rewrite to have a single thread per CPU (managing all devices on that CPU)
8 * Alan D. Brunelle <alan.brunelle@hp.com> - January 2009
9 *
d956a2cd
JA
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
d0ca268b 24 */
3fe0b570
AB
25
26#include <errno.h>
27#include <stdarg.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <fcntl.h>
32#include <getopt.h>
33#include <sched.h>
d0ca268b 34#include <unistd.h>
3fe0b570 35#include <poll.h>
d0ca268b 36#include <signal.h>
3fe0b570
AB
37#include <pthread.h>
38#include <locale.h>
d0ca268b 39#include <sys/ioctl.h>
3fe0b570
AB
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/vfs.h>
b7106311 43#include <sys/mman.h>
3fe0b570
AB
44#include <sys/param.h>
45#include <sys/time.h>
46#include <sys/resource.h>
8e86c98a 47#include <sys/socket.h>
8e86c98a
JA
48#include <netinet/in.h>
49#include <arpa/inet.h>
50#include <netdb.h>
32f18c48 51#include <sys/sendfile.h>
d0ca268b 52
3fe0b570 53#include "btt/list.h"
d0ca268b 54#include "blktrace.h"
52724a0e 55
8f551a39
JA
56/*
57 * You may want to increase this even more, if you are logging at a high
58 * rate and see skipped/missed events
59 */
3fe0b570
AB
60#define BUF_SIZE (512 * 1024)
61#define BUF_NR (4)
62
63#define FILE_VBUF_SIZE (128 * 1024)
64
65#define DEBUGFS_TYPE (0x64626720)
66#define TRACE_NET_PORT (8462)
67
68enum {
69 Net_none = 0,
70 Net_server,
71 Net_client,
72};
73
74/*
75 * Generic stats collected: nevents can be _roughly_ estimated by data_read
76 * (discounting pdu...)
77 *
78 * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
79 */
80struct pdc_stats {
81 unsigned long long data_read;
82 unsigned long long nevents;
83};
84
85struct devpath {
86 struct list_head head;
87 char *path; /* path to device special file */
88 char *buts_name; /* name returned from bt kernel code */
89 struct pdc_stats *stats;
90 int fd, idx, ncpus;
91 unsigned long long drops;
92
93 /*
94 * For piped output only:
95 *
96 * Each tracer will have a tracer_devpath_head that it will add new
97 * data onto. It's list is protected above (tracer_devpath_head.mutex)
98 * and it will signal the processing thread using the dp_cond,
99 * dp_mutex & dp_entries variables above.
100 */
101 struct tracer_devpath_head *heads;
102
103 /*
104 * For network server mode only:
105 */
106 struct cl_host *ch;
107 u32 cl_id;
108 time_t cl_connect_time;
109 struct io_info *ios;
110};
111
112/*
113 * For piped output to stdout we will have each tracer thread (one per dev)
114 * tack buffers read from the relay queues on a per-device list.
115 *
116 * The main thread will then collect trace buffers from each of lists in turn.
117 *
118 * We will use a mutex to guard each of the trace_buf list. The tracers
119 * can then signal the main thread using <dp_cond,dp_mutex> and
120 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
121 * signal. When dp_entries is 0, the main thread will wait for that condition
122 * to be signalled.)
123 *
124 * adb: It may be better just to have a large buffer per tracer per dev,
125 * and then use it as a ring-buffer. This would certainly cut down a lot
126 * of malloc/free thrashing, at the cost of more memory movements (potentially).
127 */
128struct trace_buf {
129 struct list_head head;
130 struct devpath *dpp;
131 void *buf;
132 int cpu, len;
133};
134
135struct tracer_devpath_head {
136 pthread_mutex_t mutex;
137 struct list_head head;
138 struct trace_buf *prev;
139};
140
141/*
142 * Used to handle the mmap() interfaces for output file (containing traces)
143 */
144struct mmap_info {
145 void *fs_buf;
146 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
147 unsigned long buf_size, buf_nr;
148 int pagesize;
149};
150
151/*
152 * Each thread doing work on a (client) side of blktrace will have one
153 * of these. The ios array contains input/output information, pfds holds
154 * poll() data. The volatile's provide flags to/from the main executing
155 * thread.
156 */
157struct tracer {
158 struct list_head head;
159 struct io_info *ios;
160 struct pollfd *pfds;
161 pthread_t thread;
162 pthread_mutex_t mutex;
163 pthread_cond_t cond;
164 int cpu, nios;
165 volatile int running, status, is_done;
166};
167
168/*
169 * networking stuff follows. we include a magic number so we know whether
170 * to endianness convert or not.
171 *
172 * The len field is overloaded:
173 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu
174 * 1 - Indicates a "close" - Shut down connection orderly
175 *
176 * The cpu field is overloaded on close: it will contain the number of drops.
177 */
178struct blktrace_net_hdr {
179 u32 magic; /* same as trace magic */
180 char buts_name[32]; /* trace name */
181 u32 cpu; /* for which cpu */
182 u32 max_cpus;
183 u32 len; /* length of following trace data */
184 u32 cl_id; /* id for set of client per-cpu connections */
185 u32 buf_size; /* client buf_size for this trace */
186 u32 buf_nr; /* client buf_nr for this trace */
187 u32 page_size; /* client page_size for this trace */
188};
189
190/*
191 * Each host encountered has one of these. The head is used to link this
192 * on to the network server's ch_list. Connections associated with this
193 * host are linked on conn_list, and any devices traced on that host
194 * are connected on the devpaths list.
195 */
196struct cl_host {
197 struct list_head head;
198 struct list_head conn_list;
199 struct list_head devpaths;
200 struct net_server_s *ns;
201 char *hostname;
202 struct in_addr cl_in_addr;
203 int connects, ndevs, cl_opens;
204};
205
206/*
207 * Each connection (client to server socket ('fd')) has one of these. A
208 * back reference to the host ('ch'), and lists headers (for the host
209 * list, and the network server conn_list) are also included.
210 */
211struct cl_conn {
212 struct list_head ch_head, ns_head;
213 struct cl_host *ch;
214 int fd, ncpus;
215 time_t connect_time;
216};
217
218/*
219 * The network server requires some poll structures to be maintained -
220 * one per conection currently on conn_list. The nchs/ch_list values
221 * are for each host connected to this server. The addr field is used
222 * for scratch as new connections are established.
223 */
224struct net_server_s {
225 struct list_head conn_list;
226 struct list_head ch_list;
227 struct pollfd *pfds;
228 int listen_fd, connects, nchs;
229 struct sockaddr_in addr;
230};
231
232/*
233 * This structure is (generically) used to providide information
234 * for a read-to-write set of values.
235 *
236 * ifn & ifd represent input information
237 *
238 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
239 */
240struct io_info {
241 struct devpath *dpp;
242 FILE *ofp;
243 char *obuf;
244 struct cl_conn *nc; /* Server network connection */
245
246 /*
247 * mmap controlled output files
248 */
249 struct mmap_info mmap_info;
250
251 /*
252 * Client network fields
253 */
254 unsigned int ready;
255 unsigned long long data_queued;
256
257 /*
258 * Input/output file descriptors & names
259 */
260 int ifd, ofd;
261 char ifn[MAXPATHLEN + 64];
262 char ofn[MAXPATHLEN + 64];
263};
264
265static char blktrace_version[] = "2.0.0";
266
267/*
268 * Linkage to blktrace helper routines (trace conversions)
269 */
270int data_is_native = -1;
271
272static int ncpus;
273static int pagesize;
274static int act_mask = ~0U;
275static char *debugfs_path = "/sys/kernel/debug";
276static char *output_name;
277static char *output_dir;
278static int kill_running_trace;
279static int stop_watch;
280static unsigned long buf_size = BUF_SIZE;
281static unsigned long buf_nr = BUF_NR;
282static LIST_HEAD(devpaths);
283static LIST_HEAD(tracers);
284static int ndevs;
285static volatile int done;
286static FILE *pfp;
287static int piped_output;
288static int ntracers;
d0ca268b 289
3fe0b570
AB
290static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
291static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
292static volatile int dp_entries;
293
294/*
295 * network cmd line params
296 */
e58f3937 297static struct sockaddr_in hostname_addr;
3fe0b570
AB
298static char hostname[MAXHOSTNAMELEN];
299static int net_port = TRACE_NET_PORT;
300static int net_use_sendfile = 1;
301static int net_mode;
302static int *cl_fds;
007c233c 303
3fe0b570
AB
304static int (*handle_pfds)(struct tracer *, int, int);
305static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
e3e74029 306
5d4f19d9 307#define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
d5396421 308static struct option l_opts[] = {
5c86134e 309 {
d39c04ca 310 .name = "dev",
428683db 311 .has_arg = required_argument,
d39c04ca
AB
312 .flag = NULL,
313 .val = 'd'
314 },
cf1edb17
AB
315 {
316 .name = "input-devs",
317 .has_arg = required_argument,
318 .flag = NULL,
319 .val = 'I'
320 },
5c86134e 321 {
d39c04ca 322 .name = "act-mask",
428683db 323 .has_arg = required_argument,
d39c04ca
AB
324 .flag = NULL,
325 .val = 'a'
326 },
5c86134e 327 {
d39c04ca 328 .name = "set-mask",
428683db 329 .has_arg = required_argument,
d39c04ca
AB
330 .flag = NULL,
331 .val = 'A'
332 },
5c86134e 333 {
5270dddd 334 .name = "relay",
428683db 335 .has_arg = required_argument,
5270dddd
JA
336 .flag = NULL,
337 .val = 'r'
338 },
d5396421
JA
339 {
340 .name = "output",
428683db 341 .has_arg = required_argument,
d5396421
JA
342 .flag = NULL,
343 .val = 'o'
344 },
bc39777c
JA
345 {
346 .name = "kill",
428683db 347 .has_arg = no_argument,
bc39777c
JA
348 .flag = NULL,
349 .val = 'k'
350 },
ece238a6
NS
351 {
352 .name = "stopwatch",
428683db 353 .has_arg = required_argument,
ece238a6
NS
354 .flag = NULL,
355 .val = 'w'
356 },
5d4f19d9
JA
357 {
358 .name = "version",
359 .has_arg = no_argument,
360 .flag = NULL,
361 .val = 'v'
362 },
52724a0e
JA
363 {
364 .name = "version",
365 .has_arg = no_argument,
366 .flag = NULL,
57ea8602 367 .val = 'V'
52724a0e 368 },
129aa440 369 {
3f65c585 370 .name = "buffer-size",
129aa440
JA
371 .has_arg = required_argument,
372 .flag = NULL,
373 .val = 'b'
374 },
375 {
3f65c585 376 .name = "num-sub-buffers",
129aa440
JA
377 .has_arg = required_argument,
378 .flag = NULL,
379 .val = 'n'
380 },
d1d7f15f 381 {
3f65c585 382 .name = "output-dir",
d1d7f15f
JA
383 .has_arg = required_argument,
384 .flag = NULL,
385 .val = 'D'
386 },
8e86c98a
JA
387 {
388 .name = "listen",
389 .has_arg = no_argument,
390 .flag = NULL,
391 .val = 'l'
392 },
393 {
394 .name = "host",
395 .has_arg = required_argument,
396 .flag = NULL,
397 .val = 'h'
398 },
399 {
400 .name = "port",
401 .has_arg = required_argument,
402 .flag = NULL,
403 .val = 'p'
404 },
32f18c48 405 {
79971f43 406 .name = "no-sendfile",
32f18c48
JA
407 .has_arg = no_argument,
408 .flag = NULL,
409 .val = 's'
410 },
71ef8b7c
JA
411 {
412 .name = NULL,
413 }
d39c04ca
AB
414};
415
3fe0b570
AB
416static char usage_str[] = \
417 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
418 "[ -a action ] [ -A action mask ] [ -I <devs file> ] [ -v ]\n\n" \
419 "\t-d Use specified device. May also be given last after options\n" \
420 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
421 "\t-o File(s) to send output to\n" \
422 "\t-D Directory to prepend to output file names\n" \
423 "\t-k Kill a running trace\n" \
424 "\t-w Stop after defined time, in seconds\n" \
425 "\t-a Only trace specified actions. See documentation\n" \
426 "\t-A Give trace mask as a single value. See documentation\n" \
427 "\t-b Sub buffer size in KiB\n" \
428 "\t-n Number of sub buffers\n" \
429 "\t-l Run in network listen mode (blktrace server)\n" \
430 "\t-h Run in network client mode, connecting to the given host\n" \
431 "\t-p Network port to use (default 8462)\n" \
432 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
433 "\t-I Add devices found in <devs file>\n" \
434 "\t-V Print program version info\n\n";
9db17354 435
3fe0b570
AB
436static void clear_events(struct pollfd *pfd)
437{
438 pfd->events = 0;
439 pfd->revents = 0;
440}
21f55651 441
3fe0b570
AB
442static inline int net_client_use_sendfile(void)
443{
444 return net_mode == Net_client && net_use_sendfile;
445}
21f55651 446
3fe0b570
AB
447static inline int net_client_use_send(void)
448{
449 return net_mode == Net_client && !net_use_sendfile;
450}
b9d4294e 451
3fe0b570
AB
452static inline int use_tracer_devpaths(void)
453{
454 return piped_output || net_client_use_send();
455}
b9d4294e 456
3fe0b570
AB
457static inline int in_addr_eq(struct in_addr a, struct in_addr b)
458{
459 return a.s_addr == b.s_addr;
460}
007c233c 461
3fe0b570
AB
462static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
463{
464 dpp->stats[cpu].data_read += data_read;
465}
0cc7d25e 466
3fe0b570
AB
467static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
468{
469 dpp->stats[cpu].nevents += nevents;
470}
9db17354 471
3fe0b570
AB
472static void show_usage(char *prog)
473{
474 fprintf(stderr, "Usage: %s %s %s", prog, blktrace_version, usage_str);
475}
9db17354 476
3fe0b570
AB
477static void init_mmap_info(struct mmap_info *mip)
478{
479 mip->buf_size = buf_size;
480 mip->buf_nr = buf_nr;
481 mip->pagesize = pagesize;
482}
b7106311 483
3fe0b570
AB
484static void net_close_connection(int *fd)
485{
486 shutdown(*fd, SHUT_RDWR);
487 close(*fd);
488 *fd = -1;
489}
ff11d54c 490
3fe0b570
AB
491static void dpp_free(struct devpath *dpp)
492{
493 if (dpp->stats)
494 free(dpp->stats);
495 if (dpp->ios)
496 free(dpp->ios);
497 if (dpp->path)
498 free(dpp->path);
499 if (dpp->buts_name)
500 free(dpp->buts_name);
501 free(dpp);
502}
d0ca268b 503
3fe0b570
AB
504static int lock_on_cpu(int cpu)
505{
506 cpu_set_t cpu_mask;
ff11d54c 507
3fe0b570
AB
508 CPU_ZERO(&cpu_mask);
509 CPU_SET(cpu, &cpu_mask);
510 if (sched_setaffinity(getpid(), sizeof(cpu_mask), &cpu_mask) < 0)
511 return errno;
d0ca268b 512
3fe0b570
AB
513 return 0;
514}
e7c9f3ff 515
3fe0b570
AB
516/*
517 * Create a timespec 'msec' milliseconds into the future
518 */
519static inline void make_timespec(struct timespec *tsp, long delta_msec)
520{
521 struct timeval now;
d39c04ca 522
3fe0b570
AB
523 gettimeofday(&now, NULL);
524 tsp->tv_sec = now.tv_sec;
525 tsp->tv_nsec = 1000L * now.tv_usec;
e7c9f3ff 526
3fe0b570
AB
527 tsp->tv_nsec += (delta_msec * 1000000L);
528 if (tsp->tv_nsec > 1000000000L) {
529 long secs = tsp->tv_nsec / 1000000000L;
eb3c8108 530
3fe0b570
AB
531 tsp->tv_sec += secs;
532 tsp->tv_nsec -= (secs * 1000000000L);
533 }
534}
a3e4d330 535
3fe0b570
AB
536static int increase_limit(int resource, rlim_t increase)
537{
538 struct rlimit rlim;
539 int save_errno = errno;
8e86c98a 540
3fe0b570
AB
541 if (!getrlimit(resource, &rlim)) {
542 rlim.rlim_cur += increase;
543 if (rlim.rlim_cur >= rlim.rlim_max)
544 rlim.rlim_max = rlim.rlim_cur + increase;
72ca8801 545
3fe0b570
AB
546 if (!setrlimit(resource, &rlim))
547 return 1;
548 }
99c1f5ab 549
3fe0b570
AB
550 errno = save_errno;
551 return 0;
552}
e0a1988b 553
3fe0b570
AB
554static int handle_open_failure(void)
555{
556 if (errno == ENFILE || errno == EMFILE)
557 return increase_limit(RLIMIT_NOFILE, 16);
558 return 0;
559}
99c1f5ab 560
3fe0b570
AB
561static int handle_mem_failure(size_t length)
562{
563 if (errno == ENFILE)
564 return handle_open_failure();
565 else if (errno == ENOMEM)
566 return increase_limit(RLIMIT_MEMLOCK, 2 * length);
567 return 0;
568}
99c1f5ab 569
3fe0b570
AB
570static FILE *my_fopen(const char *path, const char *mode)
571{
572 FILE *fp;
8e86c98a 573
3fe0b570
AB
574 do {
575 fp = fopen(path, mode);
576 } while (fp == NULL && handle_open_failure());
8e86c98a 577
3fe0b570
AB
578 return fp;
579}
8e86c98a 580
3fe0b570
AB
581static int my_open(const char *path, int flags)
582{
583 int fd;
8e86c98a 584
3fe0b570
AB
585 do {
586 fd = open(path, flags);
587 } while (fd < 0 && handle_open_failure());
e0a1988b 588
3fe0b570
AB
589 return fd;
590}
ff11d54c 591
3fe0b570
AB
592static int my_socket(int domain, int type, int protocol)
593{
594 int fd;
ff11d54c 595
3fe0b570
AB
596 do {
597 fd = socket(domain, type, protocol);
598 } while (fd < 0 && handle_open_failure());
8e86c98a 599
3fe0b570
AB
600 return fd;
601}
602
d5302b03
AB
603static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
604{
605 int fd;
606
607 do {
608 fd = accept(sockfd, addr, addrlen);
609 } while (fd < 0 && handle_open_failure());
610
611 return fd;
612}
613
3fe0b570
AB
614static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
615 off_t offset)
69dd57c2 616{
3fe0b570 617 void *new;
69dd57c2 618
3fe0b570
AB
619 do {
620 new = mmap(addr, length, prot, flags, fd, offset);
621 } while (new == MAP_FAILED && handle_mem_failure(length));
622
623 return new;
624}
625
626static int my_mlock(const void *addr, size_t len)
627{
628 int ret;
629
630 do {
631 ret = mlock(addr, len);
632 } while (ret < 0 && handle_mem_failure(len));
633
634 return ret;
635}
636
637static int __stop_trace(int fd)
638{
639 /*
640 * Should be stopped, don't complain if it isn't
641 */
642 ioctl(fd, BLKTRACESTOP);
643 return ioctl(fd, BLKTRACETEARDOWN);
644}
645
646static int write_data(char *buf, int len)
647{
648 int ret;
649
650rewrite:
651 ret = fwrite(buf, len, 1, pfp);
652 if (ferror(pfp) || ret != 1) {
653 if (errno == EINTR) {
654 clearerr(pfp);
655 goto rewrite;
656 }
657
658 if (!piped_output || (errno != EPIPE && errno != EBADF)) {
659 fprintf(stderr, "write(%d) failed: %d/%s\n",
660 len, errno, strerror(errno));
69dd57c2 661 }
3fe0b570 662 goto err;
69dd57c2
AB
663 }
664
3fe0b570 665 fflush(pfp);
69dd57c2 666 return 0;
3fe0b570
AB
667
668err:
669 clearerr(pfp);
670 return 1;
69dd57c2
AB
671}
672
673/*
3fe0b570 674 * Returns the number of bytes read (successfully)
69dd57c2 675 */
3fe0b570 676static int __net_recv_data(int fd, void *buf, unsigned int len)
69dd57c2 677{
3fe0b570
AB
678 unsigned int bytes_left = len;
679
680 while (bytes_left && !done) {
681 int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
682
683 if (ret == 0)
684 break;
685 else if (ret < 0) {
686 if (errno != EAGAIN) {
687 perror("server: net_recv_data: recv failed");
688 break;
689 } else
690 break;
691 } else {
692 buf += ret;
693 bytes_left -= ret;
694 }
695 }
69dd57c2 696
3fe0b570 697 return len - bytes_left;
69dd57c2
AB
698}
699
3fe0b570 700static int net_recv_data(int fd, void *buf, unsigned int len)
8e86c98a 701{
3fe0b570
AB
702 return __net_recv_data(fd, buf, len);
703}
7035d92d 704
3fe0b570
AB
705/*
706 * Returns number of bytes written
707 */
708static int net_send_data(int fd, void *buf, unsigned int buf_len)
709{
710 int ret;
711 unsigned int bytes_left = buf_len;
712
713 while (bytes_left) {
714 ret = send(fd, buf, bytes_left, 0);
715 if (ret < 0) {
716 perror("send");
717 break;
718 }
719
720 buf += ret;
721 bytes_left -= ret;
7035d92d
JA
722 }
723
3fe0b570 724 return buf_len - bytes_left;
8e86c98a
JA
725}
726
3fe0b570 727static int net_send_header(int fd, int cpu, char *buts_name, int len)
eb3c8108 728{
3fe0b570 729 struct blktrace_net_hdr hdr;
eb3c8108 730
3fe0b570 731 memset(&hdr, 0, sizeof(hdr));
eb3c8108 732
3fe0b570
AB
733 hdr.magic = BLK_IO_TRACE_MAGIC;
734 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
735 hdr.buts_name[sizeof(hdr.buts_name)-1] = '\0';
736 hdr.cpu = cpu;
737 hdr.max_cpus = ncpus;
738 hdr.len = len;
739 hdr.cl_id = getpid();
740 hdr.buf_size = buf_size;
741 hdr.buf_nr = buf_nr;
742 hdr.page_size = pagesize;
eb3c8108 743
3fe0b570
AB
744 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
745}
eb3c8108 746
3fe0b570
AB
747static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
748{
749 struct blktrace_net_hdr ret_hdr;
eb3c8108 750
3fe0b570
AB
751 net_send_header(fd, cpu, buts_name, len);
752 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
753}
eb3c8108 754
3fe0b570
AB
755static void net_send_open(int fd, int cpu, char *buts_name)
756{
757 net_send_open_close(fd, cpu, buts_name, 0);
eb3c8108
JA
758}
759
3fe0b570 760static void net_send_close(int fd, char *buts_name, int drops)
d0ca268b 761{
3fe0b570
AB
762 /*
763 * Overload CPU w/ number of drops
764 *
765 * XXX: Need to clear/set done around call - done=1 (which
766 * is true here) stops reads from happening... :-(
767 */
768 done = 0;
769 net_send_open_close(fd, drops, buts_name, 1);
770 done = 1;
771}
d0ca268b 772
3fe0b570
AB
773static void ack_open_close(int fd, char *buts_name)
774{
775 net_send_header(fd, 0, buts_name, 2);
776}
d0ca268b 777
3fe0b570
AB
778static void net_send_drops(int fd)
779{
780 struct list_head *p;
ed71a31e 781
3fe0b570
AB
782 __list_for_each(p, &devpaths) {
783 struct devpath *dpp = list_entry(p, struct devpath, head);
784
785 net_send_close(fd, dpp->buts_name, dpp->drops);
d0ca268b 786 }
3fe0b570 787}
d0ca268b 788
3fe0b570
AB
789/*
790 * Returns:
791 * 0: "EOF"
792 * 1: OK
793 * -1: Error
794 */
795static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
796{
797 int bytes_read;
798 int fl = fcntl(nc->fd, F_GETFL);
799
800 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
801 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
802 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
803
804 if (bytes_read == sizeof(*bnh))
805 return 1;
806 else if (bytes_read == 0)
807 return 0;
808 return -1;
d0ca268b
JA
809}
810
e58f3937 811static int net_setup_addr(void)
d0ca268b 812{
e58f3937 813 struct sockaddr_in *addr = &hostname_addr;
cf9208ea 814
e58f3937
AB
815 memset(addr, 0, sizeof(*addr));
816 addr->sin_family = AF_INET;
817 addr->sin_port = htons(net_port);
3fe0b570 818
e58f3937
AB
819 if (inet_aton(hostname, &addr->sin_addr) != 1) {
820 struct hostent *hent;
821retry:
822 hent = gethostbyname(hostname);
3fe0b570 823 if (!hent) {
e58f3937
AB
824 if (h_errno == TRY_AGAIN) {
825 usleep(100);
826 goto retry;
827 } else if (h_errno == NO_RECOVERY) {
828 fprintf(stderr, "gethostbyname(%s)"
829 "non-recoverable error encountered\n",
830 hostname);
831 } else {
832 /*
833 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA
834 */
835 fprintf(stderr, "Host %s not found\n",
836 hostname);
837 }
3fe0b570
AB
838 return 1;
839 }
840
e58f3937 841 memcpy(&addr->sin_addr, hent->h_addr, 4);
3fe0b570
AB
842 strcpy(hostname, hent->h_name);
843 }
7035d92d 844
e58f3937
AB
845 return 0;
846}
847
848static int net_setup_client(void)
849{
850 int fd;
851 struct sockaddr_in *addr = &hostname_addr;
852
3fe0b570
AB
853 fd = my_socket(AF_INET, SOCK_STREAM, 0);
854 if (fd < 0) {
855 perror("client: socket");
856 return -1;
857 }
cf9208ea 858
e58f3937 859 if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
3fe0b570
AB
860 if (errno == ECONNREFUSED)
861 fprintf(stderr,
862 "\nclient: Connection to %s refused, "
863 "perhaps the server is not started?\n\n",
864 hostname);
865 else
866 perror("client: connect");
867 close(fd);
868 return -1;
707b0914 869 }
3fe0b570
AB
870
871 return fd;
d0ca268b
JA
872}
873
3fe0b570 874static int open_client_connections(void)
e7c9f3ff 875{
3fe0b570 876 int cpu;
e7c9f3ff 877
3fe0b570
AB
878 cl_fds = calloc(ncpus, sizeof(*cl_fds));
879 for (cpu = 0; cpu < ncpus; cpu++) {
880 cl_fds[cpu] = net_setup_client();
881 if (cl_fds[cpu] < 0)
882 goto err;
eb3c8108 883 }
3fe0b570
AB
884 return 0;
885
886err:
887 while (cpu > 0)
888 close(cl_fds[cpu--]);
889 free(cl_fds);
890 return 1;
e7c9f3ff
NS
891}
892
3fe0b570 893static void close_client_connections(void)
eb3c8108 894{
3fe0b570
AB
895 if (cl_fds) {
896 int cpu, *fdp;
eb3c8108 897
3fe0b570
AB
898 for (cpu = 0, fdp = cl_fds; cpu < ncpus; cpu++, fdp++) {
899 if (*fdp >= 0) {
900 net_send_drops(*fdp);
901 net_close_connection(fdp);
902 }
7934e668 903 }
3fe0b570 904 free(cl_fds);
ff11d54c 905 }
eb3c8108
JA
906}
907
3fe0b570 908static void setup_buts(void)
d0ca268b 909{
3fe0b570 910 struct list_head *p;
bbabf03a 911
3fe0b570
AB
912 __list_for_each(p, &devpaths) {
913 struct blk_user_trace_setup buts;
914 struct devpath *dpp = list_entry(p, struct devpath, head);
ae9f71b3 915
3fe0b570
AB
916 memset(&buts, 0, sizeof(buts));
917 buts.buf_size = buf_size;
918 buts.buf_nr = buf_nr;
919 buts.act_mask = act_mask;
920
921 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) < 0) {
922 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
923 dpp->path, errno, strerror(errno));
9db17354 924 continue;
3fe0b570
AB
925 } else if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
926 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
927 dpp->path, errno, strerror(errno));
9db17354 928 continue;
bbabf03a 929 }
8e86c98a 930
3fe0b570
AB
931 dpp->ncpus = ncpus;
932 dpp->buts_name = strdup(buts.name);
933 if (dpp->stats)
934 free(dpp->stats);
935 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
936 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
937 }
8a43bac5
JA
938}
939
3fe0b570 940static int get_drops(struct devpath *dpp)
8e86c98a 941{
3fe0b570
AB
942 int fd, drops = 0;
943 char fn[MAXPATHLEN + 64], tmp[256];
8e86c98a 944
3fe0b570
AB
945 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
946 dpp->buts_name);
8e86c98a 947
3fe0b570
AB
948 fd = my_open(fn, O_RDONLY);
949 if (fd < 0) {
950 /*
951 * This may be ok: the kernel may not support
952 * dropped counts.
953 */
954 if (errno != ENOENT)
955 fprintf(stderr, "Could not open %s: %d/%s\n",
956 fn, errno, strerror(errno));
957 return 0;
958 } else if (read(fd, tmp, sizeof(tmp)) < 0) {
959 fprintf(stderr, "Could not read %s: %d/%s\n",
960 fn, errno, strerror(errno));
961 } else
962 drops = atoi(tmp);
963 close(fd);
8e86c98a 964
3fe0b570 965 return drops;
8e86c98a
JA
966}
967
3fe0b570 968static void get_all_drops(void)
a3e4d330 969{
3fe0b570 970 struct list_head *p;
21f55651 971
3fe0b570
AB
972 __list_for_each(p, &devpaths) {
973 struct devpath *dpp = list_entry(p, struct devpath, head);
974 dpp->drops = get_drops(dpp);
21f55651 975 }
9db17354 976}
eb3c8108 977
3fe0b570 978static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
9db17354 979{
3fe0b570 980 struct trace_buf *tbp;
21f55651 981
3fe0b570
AB
982 tbp = malloc(sizeof(*tbp) + bufsize);
983 INIT_LIST_HEAD(&tbp->head);
984 tbp->len = 0;
985 tbp->buf = (void *)(tbp + 1);
986 tbp->cpu = cpu;
987 tbp->dpp = NULL; /* Will be set when tbp is added */
21f55651 988
3fe0b570 989 return tbp;
a3e4d330
JA
990}
991
3fe0b570 992static void free_tracer_heads(struct devpath *dpp)
b7106311 993{
3fe0b570
AB
994 int cpu;
995 struct tracer_devpath_head *hd;
b7106311 996
3fe0b570
AB
997 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
998 if (hd->prev)
999 free(hd->prev);
1000 pthread_mutex_destroy(&hd->mutex);
1001 }
1002 free(dpp->heads);
1003}
b7106311 1004
3fe0b570
AB
1005static int setup_tracer_devpaths(void)
1006{
1007 struct list_head *p;
b7106311 1008
3fe0b570
AB
1009 if (net_client_use_send())
1010 if (open_client_connections())
1011 return 1;
b7106311 1012
3fe0b570
AB
1013 __list_for_each(p, &devpaths) {
1014 int cpu;
1015 struct tracer_devpath_head *hd;
1016 struct devpath *dpp = list_entry(p, struct devpath, head);
b7106311 1017
3fe0b570
AB
1018 dpp->heads = calloc(ncpus, sizeof(struct tracer_devpath_head));
1019 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
1020 INIT_LIST_HEAD(&hd->head);
1021 pthread_mutex_init(&hd->mutex, NULL);
1022 hd->prev = NULL;
1023 }
b7106311
JA
1024 }
1025
3fe0b570 1026 return 0;
b7106311
JA
1027}
1028
3fe0b570
AB
1029static inline void add_trace_buf(struct devpath *dpp, int cpu,
1030 struct trace_buf **tbpp)
18eed2a7 1031{
3fe0b570
AB
1032 struct trace_buf *tbp = *tbpp;
1033 struct tracer_devpath_head *hd = &dpp->heads[cpu];
18eed2a7 1034
3fe0b570 1035 tbp->dpp = dpp;
2f064793 1036
3fe0b570
AB
1037 pthread_mutex_lock(&hd->mutex);
1038 list_add_tail(&tbp->head, &hd->head);
1039 pthread_mutex_unlock(&hd->mutex);
18eed2a7 1040
3fe0b570 1041 *tbpp = alloc_trace_buf(cpu, buf_size);
18eed2a7
JA
1042}
1043
3fe0b570 1044static inline void incr_entries(int entries_handled)
a3e4d330 1045{
3fe0b570
AB
1046 pthread_mutex_lock(&dp_mutex);
1047 if (dp_entries == 0)
1048 pthread_cond_signal(&dp_cond);
1049 dp_entries += entries_handled;
1050 pthread_mutex_unlock(&dp_mutex);
a3e4d330
JA
1051}
1052
3fe0b570 1053static int add_devpath(char *path)
8e86c98a 1054{
3fe0b570
AB
1055 int fd;
1056 struct devpath *dpp;
1057
8e86c98a 1058 /*
3fe0b570 1059 * Verify device is valid before going too far
8e86c98a 1060 */
3fe0b570
AB
1061 fd = my_open(path, O_RDONLY | O_NONBLOCK);
1062 if (fd < 0) {
1063 fprintf(stderr, "Invalid path %s specified: %d/%s\n",
1064 path, errno, strerror(errno));
1065 return 1;
1066 }
8e86c98a 1067
3fe0b570
AB
1068 dpp = malloc(sizeof(*dpp));
1069 memset(dpp, 0, sizeof(*dpp));
1070 dpp->path = strdup(path);
1071 dpp->fd = fd;
1072 dpp->idx = ndevs++;
1073 list_add_tail(&dpp->head, &devpaths);
8e86c98a 1074
3fe0b570 1075 return 0;
8e86c98a
JA
1076}
1077
3fe0b570 1078static void rel_devpaths(void)
a3e4d330 1079{
3fe0b570 1080 struct list_head *p, *q;
a3e4d330 1081
3fe0b570
AB
1082 list_for_each_safe(p, q, &devpaths) {
1083 struct devpath *dpp = list_entry(p, struct devpath, head);
a3e4d330 1084
3fe0b570
AB
1085 list_del(&dpp->head);
1086 __stop_trace(dpp->fd);
1087 close(dpp->fd);
a3e4d330 1088
3fe0b570
AB
1089 if (dpp->heads)
1090 free_tracer_heads(dpp);
a3e4d330 1091
3fe0b570
AB
1092 dpp_free(dpp);
1093 ndevs--;
b7106311 1094 }
8e86c98a 1095}
b7106311 1096
3fe0b570 1097static int flush_subbuf_net(struct trace_buf *tbp)
8e86c98a 1098{
3fe0b570
AB
1099 int fd = cl_fds[tbp->cpu];
1100 struct devpath *dpp = tbp->dpp;
b7106311 1101
3fe0b570
AB
1102 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
1103 return 1;
8e86c98a 1104
3fe0b570
AB
1105 if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
1106 return 1;
a3e4d330 1107
8e86c98a 1108 return 0;
a3e4d330
JA
1109}
1110
3fe0b570
AB
1111static int
1112handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
1113 struct list_head *list)
8e86c98a 1114{
3fe0b570
AB
1115 struct trace_buf *tbp;
1116 struct list_head *p, *q;
1117 int entries_handled = 0;
8e86c98a 1118
3fe0b570
AB
1119 list_for_each_safe(p, q, list) {
1120 tbp = list_entry(p, struct trace_buf, head);
8e86c98a 1121
3fe0b570
AB
1122 list_del(&tbp->head);
1123 entries_handled++;
6a752c90 1124
3fe0b570
AB
1125 if (cl_fds[tbp->cpu] >= 0) {
1126 if (flush_subbuf_net(tbp)) {
1127 close(cl_fds[tbp->cpu]);
1128 cl_fds[tbp->cpu] = -1;
1129 }
1130 }
7ab2f837 1131
3fe0b570 1132 free(tbp);
7934e668
JA
1133 }
1134
3fe0b570 1135 return entries_handled;
6a752c90
JA
1136}
1137
3fe0b570
AB
1138static int handle_list_file(struct tracer_devpath_head *hd,
1139 struct list_head *list)
f6fead25 1140{
3fe0b570
AB
1141 int off, t_len, nevents;
1142 struct blk_io_trace *t;
1143 struct list_head *p, *q;
1144 int entries_handled = 0;
1145 struct trace_buf *tbp, *prev;
11629347 1146
3fe0b570
AB
1147 prev = hd->prev;
1148 list_for_each_safe(p, q, list) {
1149 tbp = list_entry(p, struct trace_buf, head);
1150 list_del(&tbp->head);
1151 entries_handled++;
18eed2a7 1152
3fe0b570
AB
1153 /*
1154 * If there was some leftover before, tack this new
1155 * entry onto the tail of the previous one.
1156 */
1157 if (prev) {
1158 unsigned long tot_len;
1159 struct trace_buf *tmp = tbp;
1160
1161 tbp = prev;
1162 prev = NULL;
1163
1164 tot_len = tbp->len + tmp->len;
1165 if (tot_len > buf_size) {
1166 /*
1167 * tbp->head isn't connected (it was 'prev'
1168 * so it had been taken off of the list
1169 * before). Therefore, we can realloc
1170 * the whole structures, as the other fields
1171 * are "static".
1172 */
1173 tbp = realloc(tbp->buf, sizeof(*tbp) + tot_len);
1174 tbp->buf = (void *)(tbp + 1);
1175 }
32f18c48 1176
3fe0b570
AB
1177 memcpy(tbp->buf + tbp->len, tmp->buf, tmp->len);
1178 tbp->len = tot_len;
32f18c48 1179
3fe0b570
AB
1180 free(tmp);
1181 }
ff11d54c 1182
3fe0b570
AB
1183 /*
1184 * See how many whole traces there are - send them
1185 * all out in one go.
1186 */
1187 off = 0;
1188 nevents = 0;
1189 while (off + (int)sizeof(*t) <= tbp->len) {
1190 t = (struct blk_io_trace *)(tbp->buf + off);
1191 t_len = sizeof(*t) + t->pdu_len;
1192 if (off + t_len > tbp->len)
1193 break;
ff11d54c 1194
3fe0b570
AB
1195 off += t_len;
1196 nevents++;
1197 }
1198 if (nevents)
1199 pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
4aeec019 1200
3fe0b570
AB
1201 /*
1202 * Write any full set of traces, any remaining data is kept
1203 * for the next pass.
1204 */
1205 if (off) {
1206 if (write_data(tbp->buf, off) || off == tbp->len)
1207 free(tbp);
1208 else {
1209 /*
1210 * Move valid data to beginning of buffer
1211 */
1212 tbp->len -= off;
1213 memmove(tbp->buf, tbp->buf + off, tbp->len);
1214 prev = tbp;
1215 }
1216 } else
1217 prev = tbp;
ff11d54c 1218 }
3fe0b570 1219 hd->prev = prev;
ff11d54c 1220
3fe0b570 1221 return entries_handled;
ff11d54c
TZ
1222}
1223
3fe0b570 1224static void __process_trace_bufs(void)
8a43bac5 1225{
3fe0b570
AB
1226 int cpu;
1227 struct list_head *p;
1228 struct list_head list;
1229 int handled = 0;
1230
1231 __list_for_each(p, &devpaths) {
1232 struct devpath *dpp = list_entry(p, struct devpath, head);
1233 struct tracer_devpath_head *hd = dpp->heads;
1234
1235 for (cpu = 0; cpu < ncpus; cpu++, hd++) {
1236 pthread_mutex_lock(&hd->mutex);
1237 if (list_empty(&hd->head)) {
1238 pthread_mutex_unlock(&hd->mutex);
1239 continue;
1240 }
8a43bac5 1241
3fe0b570
AB
1242 list_replace_init(&hd->head, &list);
1243 pthread_mutex_unlock(&hd->mutex);
6480258a 1244
3fe0b570
AB
1245 handled += handle_list(hd, &list);
1246 }
d0ca268b
JA
1247 }
1248
3fe0b570
AB
1249 if (handled) {
1250 pthread_mutex_lock(&dp_mutex);
1251 dp_entries -= handled;
1252 pthread_mutex_unlock(&dp_mutex);
1253 }
8a43bac5
JA
1254}
1255
3fe0b570 1256static void process_trace_bufs(void)
8a43bac5 1257{
3fe0b570
AB
1258 while (!done) {
1259 pthread_mutex_lock(&dp_mutex);
1260 while (!done && dp_entries == 0) {
1261 struct timespec ts;
d0ca268b 1262
3fe0b570
AB
1263 make_timespec(&ts, 50);
1264 pthread_cond_timedwait(&dp_cond, &dp_mutex, &ts);
9cfa6c2b 1265 }
3fe0b570 1266 pthread_mutex_unlock(&dp_mutex);
3a9d6c13 1267
3fe0b570 1268 __process_trace_bufs();
3a9d6c13 1269 }
3fe0b570 1270}
3a9d6c13 1271
3fe0b570
AB
1272static void clean_trace_bufs(void)
1273{
3a9d6c13 1274 /*
3fe0b570
AB
1275 * No mutex needed here: we're only reading from the lists,
1276 * tracers are done
3a9d6c13 1277 */
3fe0b570
AB
1278 while (dp_entries)
1279 __process_trace_bufs();
1280}
4b5db44a 1281
3fe0b570
AB
1282static inline void read_err(int cpu, char *ifn)
1283{
1284 if (errno != EAGAIN)
1285 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
1286 cpu, ifn, errno, strerror(errno));
4b5db44a
JA
1287}
1288
3fe0b570 1289static int net_sendfile(struct io_info *iop)
d5396421 1290{
3fe0b570 1291 int ret;
d5396421 1292
3fe0b570
AB
1293 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
1294 if (ret < 0) {
1295 perror("sendfile");
1296 return 1;
1297 } else if (ret < (int)iop->ready) {
1298 fprintf(stderr, "short sendfile send (%d of %d)\n",
1299 ret, iop->ready);
1300 return 1;
1301 }
91816d54 1302
9db17354 1303 return 0;
91816d54
JA
1304}
1305
3fe0b570 1306static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
d0ca268b 1307{
3fe0b570 1308 struct devpath *dpp = iop->dpp;
d0ca268b 1309
3fe0b570
AB
1310 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
1311 return 1;
1312 return net_sendfile(iop);
1313}
d0ca268b 1314
3fe0b570
AB
1315static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
1316{
1317 struct stat sb;
1318 int i, nentries = 0;
1319 struct pdc_stats *sp;
1320 struct pollfd *pfd = tp->pfds;
1321 struct io_info *iop = tp->ios;
1322
1323 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++, sp++) {
1324 if (pfd->revents & POLLIN || force_read) {
1325 if (fstat(iop->ifd, &sb) < 0) {
1326 perror(iop->ifn);
1327 pfd->events = 0;
1328 } else if (sb.st_size > (off_t)iop->data_queued) {
1329 iop->ready = sb.st_size - iop->data_queued;
1330 iop->data_queued = sb.st_size;
1331 if (!net_sendfile_data(tp, iop)) {
1332 pdc_dr_update(iop->dpp, tp->cpu,
1333 iop->ready);
1334 nentries++;
1335 } else
1336 clear_events(pfd);
9db17354 1337 }
3fe0b570 1338 nevs--;
9db17354 1339 }
d0ca268b
JA
1340 }
1341
3fe0b570
AB
1342 if (nentries)
1343 incr_entries(nentries);
1344
1345 return nentries;
d0ca268b
JA
1346}
1347
3fe0b570 1348static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
b7106311 1349{
3fe0b570
AB
1350 int i, nentries = 0;
1351 struct trace_buf *tbp;
1352 struct pollfd *pfd = tp->pfds;
1353 struct io_info *iop = tp->ios;
1354
1355 tbp = alloc_trace_buf(tp->cpu, buf_size);
1356 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1357 if (pfd->revents & POLLIN || force_read) {
1358 tbp->len = read(iop->ifd, tbp->buf, buf_size);
1359 if (tbp->len > 0) {
1360 pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
1361 add_trace_buf(iop->dpp, tp->cpu, &tbp);
1362 nentries++;
1363 } else if (tbp->len == 0) {
1364 /*
1365 * Short reads after we're done stop us
1366 * from trying reads.
1367 */
1368 if (tp->is_done)
1369 clear_events(pfd);
1370 } else {
1371 read_err(tp->cpu, iop->ifn);
1372 if (errno != EAGAIN || tp->is_done)
1373 clear_events(pfd);
1374 }
1375 if (!piped_output && --nevs == 0)
1376 break;
1377 }
b7106311 1378 }
3fe0b570 1379 free(tbp);
6a752c90 1380
3fe0b570
AB
1381 if (nentries)
1382 incr_entries(nentries);
1383
1384 return nentries;
b7106311
JA
1385}
1386
3fe0b570 1387static int fill_ofname(struct io_info *iop, int cpu)
8e86c98a 1388{
3fe0b570 1389 int len;
e3bf54d8 1390 struct stat sb;
3fe0b570 1391 char *dst = iop->ofn;
8e86c98a
JA
1392
1393 if (output_dir)
3fe0b570 1394 len = snprintf(iop->ofn, sizeof(iop->ofn), "%s/", output_dir);
dd870ef6 1395 else
3fe0b570 1396 len = snprintf(iop->ofn, sizeof(iop->ofn), "./");
8e86c98a 1397
e3bf54d8 1398 if (net_mode == Net_server) {
3fe0b570 1399 struct cl_conn *nc = iop->nc;
e0a1988b 1400
3fe0b570
AB
1401 len += sprintf(dst + len, "%s-", nc->ch->hostname);
1402 len += strftime(dst + len, 64, "%F-%T/",
1403 gmtime(&iop->dpp->cl_connect_time));
e3bf54d8
JA
1404 }
1405
3fe0b570 1406 if (stat(iop->ofn, &sb) < 0) {
e3bf54d8 1407 if (errno != ENOENT) {
3fe0b570
AB
1408 fprintf(stderr,
1409 "Destination dir %s stat failed: %d/%s\n",
1410 iop->ofn, errno, strerror(errno));
e3bf54d8
JA
1411 return 1;
1412 }
3fe0b570
AB
1413 if (mkdir(iop->ofn, 0755) < 0) {
1414 fprintf(stderr,
1415 "Destination dir %s can't be made: %d/%s\n",
1416 iop->ofn, errno, strerror(errno));
e3bf54d8
JA
1417 return 1;
1418 }
1419 }
1420
8e86c98a 1421 if (output_name)
3fe0b570
AB
1422 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1423 output_name, cpu);
8e86c98a 1424 else
3fe0b570
AB
1425 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1426 iop->dpp->buts_name, cpu);
e3bf54d8
JA
1427
1428 return 0;
8e86c98a
JA
1429}
1430
3fe0b570 1431static int set_vbuf(struct io_info *iop, int mode, size_t size)
0cc7d25e 1432{
3fe0b570
AB
1433 iop->obuf = malloc(size);
1434 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
1435 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
1436 iop->dpp->path, (int)size, errno,
1437 strerror(errno));
1438 free(iop->obuf);
ddf22842
JA
1439 return 1;
1440 }
d5396421 1441
ddf22842
JA
1442 return 0;
1443}
007c233c 1444
3fe0b570 1445static int iop_open(struct io_info *iop, int cpu)
ddf22842 1446{
3fe0b570
AB
1447 iop->ofd = -1;
1448 if (fill_ofname(iop, cpu))
1449 return 1;
0cc7d25e 1450
3fe0b570
AB
1451 iop->ofp = my_fopen(iop->ofn, "w+");
1452 if (iop->ofp == NULL) {
1453 fprintf(stderr, "Open output file %s failed: %d/%s\n",
1454 iop->ofn, errno, strerror(errno));
1455 return 1;
1456 }
1457 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
1458 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
1459 iop->ofn, errno, strerror(errno));
1460 fclose(iop->ofp);
1461 return 1;
d0ca268b
JA
1462 }
1463
3fe0b570 1464 iop->ofd = fileno(iop->ofp);
e7c9f3ff 1465 return 0;
d0ca268b
JA
1466}
1467
3fe0b570 1468static int open_ios(struct tracer *tp)
3aabcd89 1469{
3fe0b570
AB
1470 struct pollfd *pfd;
1471 struct io_info *iop;
1472 struct list_head *p;
1473
1474 tp->ios = calloc(ndevs, sizeof(struct io_info));
1475 tp->pfds = calloc(ndevs, sizeof(struct pollfd));
1476
1477 memset(tp->ios, 0, ndevs * sizeof(struct io_info));
1478 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
1479
1480 tp->nios = 0;
1481 iop = tp->ios;
1482 pfd = tp->pfds;
1483 __list_for_each(p, &devpaths) {
1484 struct devpath *dpp = list_entry(p, struct devpath, head);
1485
1486 iop->dpp = dpp;
1487 iop->ofd = -1;
1488 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
1489 debugfs_path, dpp->buts_name, tp->cpu);
1490
1491 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
1492 if (iop->ifd < 0) {
1493 fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
1494 tp->cpu, iop->ifn, errno, strerror(errno));
1495 return 1;
1496 }
1497
1498 init_mmap_info(&iop->mmap_info);
1499
1500 pfd->fd = iop->ifd;
1501 pfd->events = POLLIN;
1502
1503 if (piped_output)
1504 ;
1505 else if (net_client_use_sendfile()) {
1506 iop->ofd = net_setup_client();
1507 if (iop->ofd < 0)
1508 goto err;
1509 net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
1510 } else if (net_mode == Net_none) {
1511 if (iop_open(iop, tp->cpu))
1512 goto err;
1513 } else {
1514 /*
1515 * This ensures that the server knows about all
1516 * connections & devices before _any_ closes
1517 */
1518 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
1519 }
007c233c 1520
3fe0b570
AB
1521 pfd++;
1522 iop++;
1523 tp->nios++;
9db17354 1524 }
3aabcd89 1525
3fe0b570 1526 return 0;
72ca8801 1527
3fe0b570
AB
1528err:
1529 close(iop->ifd); /* tp->nios _not_ bumped */
1530 return 1;
e7c9f3ff
NS
1531}
1532
3fe0b570 1533static void close_iop(struct io_info *iop)
e7c9f3ff 1534{
3fe0b570 1535 struct mmap_info *mip = &iop->mmap_info;
007c233c 1536
3fe0b570
AB
1537 if (mip->fs_buf)
1538 munmap(mip->fs_buf, mip->fs_buf_len);
72ca8801 1539
3fe0b570
AB
1540 if (!piped_output) {
1541 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
1542 fprintf(stderr,
1543 "Ignoring err: ftruncate(%s): %d/%s\n",
1544 iop->ofn, errno, strerror(errno));
1545 }
eb3c8108
JA
1546 }
1547
3fe0b570
AB
1548 if (iop->ofp)
1549 fclose(iop->ofp);
1550 if (iop->obuf)
1551 free(iop->obuf);
72ca8801
NS
1552}
1553
3fe0b570 1554static void close_ios(struct tracer *tp)
e7c9f3ff 1555{
3fe0b570
AB
1556 while (tp->nios > 0) {
1557 struct io_info *iop = &tp->ios[--tp->nios];
e7c9f3ff 1558
3fe0b570
AB
1559 iop->dpp->drops = get_drops(iop->dpp);
1560 if (iop->ifd >= 0)
1561 close(iop->ifd);
e7c9f3ff 1562
3fe0b570
AB
1563 if (iop->ofp)
1564 close_iop(iop);
1565 else if (iop->ofd >= 0) {
1566 struct devpath *dpp = iop->dpp;
d0ca268b 1567
3fe0b570
AB
1568 net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
1569 net_close_connection(&iop->ofd);
e7c9f3ff
NS
1570 }
1571 }
99c1f5ab 1572
3fe0b570
AB
1573 free(tp->ios);
1574 free(tp->pfds);
e7c9f3ff
NS
1575}
1576
3fe0b570 1577static int setup_mmap(int fd, unsigned int maxlen, struct mmap_info *mip)
e7c9f3ff 1578{
3fe0b570
AB
1579 if (mip->fs_off + maxlen > mip->fs_buf_len) {
1580 unsigned long nr = max(16, mip->buf_nr);
e7c9f3ff 1581
3fe0b570
AB
1582 if (mip->fs_buf) {
1583 munlock(mip->fs_buf, mip->fs_buf_len);
1584 munmap(mip->fs_buf, mip->fs_buf_len);
1585 mip->fs_buf = NULL;
e7c9f3ff 1586 }
99c1f5ab 1587
3fe0b570
AB
1588 mip->fs_off = mip->fs_size & (mip->pagesize - 1);
1589 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
1590 mip->fs_max_size += mip->fs_buf_len;
e7c9f3ff 1591
3fe0b570
AB
1592 if (ftruncate(fd, mip->fs_max_size) < 0) {
1593 perror("__setup_mmap: ftruncate");
1594 return 1;
e7c9f3ff 1595 }
99c1f5ab 1596
3fe0b570
AB
1597 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
1598 MAP_SHARED, fd,
1599 mip->fs_size - mip->fs_off);
1600 if (mip->fs_buf == MAP_FAILED) {
1601 perror("__setup_mmap: mmap");
1602 return 1;
1603 }
1604 my_mlock(mip->fs_buf, mip->fs_buf_len);
d0ca268b
JA
1605 }
1606
e7c9f3ff 1607 return 0;
d0ca268b
JA
1608}
1609
3fe0b570 1610static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
e7c9f3ff 1611{
3fe0b570
AB
1612 struct mmap_info *mip;
1613 int i, ret, nentries = 0;
1614 struct pollfd *pfd = tp->pfds;
1615 struct io_info *iop = tp->ios;
1616
1617 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
1618 if (pfd->revents & POLLIN || force_read) {
1619 mip = &iop->mmap_info;
1620
1621 ret = setup_mmap(iop->ofd, buf_size, mip);
1622 if (ret < 0) {
1623 pfd->events = 0;
1624 break;
1625 }
428683db 1626
3fe0b570
AB
1627 ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
1628 buf_size);
1629 if (ret > 0) {
1630 pdc_dr_update(iop->dpp, tp->cpu, ret);
1631 mip->fs_size += ret;
1632 mip->fs_off += ret;
1633 nentries++;
1634 } else if (ret == 0) {
1635 /*
1636 * Short reads after we're done stop us
1637 * from trying reads.
1638 */
1639 if (tp->is_done)
1640 clear_events(pfd);
1641 } else {
1642 read_err(tp->cpu, iop->ifn);
1643 if (errno != EAGAIN || tp->is_done)
1644 clear_events(pfd);
1645 }
1646 nevs--;
e7c9f3ff 1647 }
e7c9f3ff 1648 }
56070ea4 1649
3fe0b570 1650 return nentries;
e7c9f3ff 1651}
52724a0e 1652
3fe0b570 1653static void *thread_main(void *arg)
8e86c98a 1654{
3fe0b570
AB
1655 int ret, ndone;
1656 int to_val;
8e86c98a 1657
3fe0b570 1658 struct tracer *tp = arg;
8e86c98a 1659
3fe0b570
AB
1660 ret = lock_on_cpu(tp->cpu);
1661 if (ret)
1662 goto err;
ff11d54c 1663
3fe0b570
AB
1664 ret = open_ios(tp);
1665 if (ret) {
1666 close_ios(tp);
1667 goto err;
8e86c98a
JA
1668 }
1669
3fe0b570
AB
1670 pthread_mutex_lock(&tp->mutex);
1671 tp->running = 1;
1672 pthread_cond_signal(&tp->cond);
1673 pthread_mutex_unlock(&tp->mutex);
6a6d3f0f 1674
3fe0b570
AB
1675 if (piped_output)
1676 to_val = 50; /* Frequent partial handles */
ff11d54c 1677 else
3fe0b570
AB
1678 to_val = 500; /* 1/2 second intervals */
1679
1680 while (!tp->is_done) {
1681 ndone = poll(tp->pfds, ndevs, to_val);
1682 if (ndone || piped_output)
1683 (void)handle_pfds(tp, ndone, piped_output);
1684 else if (ndone < 0 && errno != EINTR)
1685 fprintf(stderr, "Thread %d poll failed: %d/%s\n",
1686 tp->cpu, errno, strerror(errno));
1687 }
22cd0c02
JA
1688
1689 /*
3fe0b570 1690 * Trace is stopped, pull data until we get a short read
22cd0c02 1691 */
3fe0b570
AB
1692 while (handle_pfds(tp, ndevs, 1) > 0)
1693 ;
ff11d54c 1694
3fe0b570 1695 close_ios(tp);
8e86c98a 1696
3fe0b570
AB
1697err:
1698 pthread_mutex_lock(&tp->mutex);
1699 tp->running = 0;
1700 tp->status = ret;
1701 pthread_cond_signal(&tp->cond);
1702 pthread_mutex_unlock(&tp->mutex);
1703 return NULL;
22cd0c02
JA
1704}
1705
3fe0b570 1706static int start_tracer(int cpu)
22cd0c02 1707{
3fe0b570 1708 struct tracer *tp;
22cd0c02 1709
3fe0b570
AB
1710 tp = malloc(sizeof(*tp));
1711 memset(tp, 0, sizeof(*tp));
7ab2f837 1712
3fe0b570
AB
1713 INIT_LIST_HEAD(&tp->head);
1714 pthread_mutex_init(&tp->mutex, NULL);
1715 pthread_cond_init(&tp->cond, NULL);
1716 tp->running = 0;
1717 tp->status = 0;
1718 tp->cpu = cpu;
8e86c98a 1719
3fe0b570
AB
1720 if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
1721 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
1722 cpu, errno, strerror(errno));
1723 goto err;
1724 }
8e86c98a 1725
3fe0b570
AB
1726 pthread_mutex_lock(&tp->mutex);
1727 while (!tp->running && (tp->status == 0))
1728 pthread_cond_wait(&tp->cond, &tp->mutex);
1729 pthread_mutex_unlock(&tp->mutex);
1730
1731 if (tp->status == 0) {
1732 list_add_tail(&tp->head, &tracers);
1733 return 0;
8e86c98a 1734 }
3fe0b570
AB
1735
1736 fprintf(stderr, "FAILED to start thread on CPU %d\n", cpu);
1737
1738err:
1739 pthread_mutex_destroy(&tp->mutex);
1740 pthread_cond_destroy(&tp->cond);
1741 free(tp);
1742 return 1;
8e86c98a
JA
1743}
1744
3fe0b570 1745static int start_tracers(void)
e0a1988b 1746{
3fe0b570
AB
1747 int cpu;
1748
1749 for (cpu = 0; cpu < ncpus; cpu++)
1750 if (start_tracer(cpu))
1751 break;
e0a1988b 1752
3fe0b570
AB
1753 return cpu;
1754}
e0a1988b 1755
3fe0b570
AB
1756static void stop_tracers(void)
1757{
1758 struct list_head *p;
e0a1988b
JA
1759
1760 /*
3fe0b570 1761 * Stop the tracing - makes the tracer threads clean up quicker.
e0a1988b 1762 */
3fe0b570
AB
1763 __list_for_each(p, &devpaths) {
1764 struct devpath *dpp = list_entry(p, struct devpath, head);
1765 (void)ioctl(dpp->fd, BLKTRACESTOP);
e0a1988b
JA
1766 }
1767
3fe0b570
AB
1768 /*
1769 * Tell each tracer to quit
1770 */
1771 __list_for_each(p, &tracers) {
1772 struct tracer *tp = list_entry(p, struct tracer, head);
1773 tp->is_done = 1;
1774 }
ff11d54c 1775}
e0a1988b 1776
3fe0b570 1777static void del_tracers(void)
ff11d54c 1778{
3fe0b570 1779 struct list_head *p, *q;
ff11d54c 1780
3fe0b570
AB
1781 list_for_each_safe(p, q, &tracers) {
1782 struct tracer *tp = list_entry(p, struct tracer, head);
ff11d54c 1783
3fe0b570
AB
1784 list_del(&tp->head);
1785 free(tp);
e0a1988b 1786 }
3fe0b570 1787 ntracers = 0;
ff11d54c 1788}
e0a1988b 1789
3fe0b570 1790static void wait_tracers(void)
ff11d54c 1791{
3fe0b570 1792 struct list_head *p;
ff11d54c 1793
3fe0b570
AB
1794 if (use_tracer_devpaths())
1795 process_trace_bufs();
1796
1797 __list_for_each(p, &tracers) {
1798 int ret;
1799 struct tracer *tp = list_entry(p, struct tracer, head);
1800
1801 pthread_mutex_lock(&tp->mutex);
1802 while (tp->running)
1803 pthread_cond_wait(&tp->cond, &tp->mutex);
1804 pthread_mutex_unlock(&tp->mutex);
1805
1806 ret = pthread_join(tp->thread, NULL);
1807 if (ret)
1808 fprintf(stderr, "Thread join %d failed %d\n",
1809 tp->cpu, ret);
ff11d54c
TZ
1810 }
1811
3fe0b570
AB
1812 if (use_tracer_devpaths())
1813 clean_trace_bufs();
1814
1815 get_all_drops();
ff11d54c
TZ
1816}
1817
3fe0b570 1818static void exit_tracing(void)
ff11d54c 1819{
3fe0b570
AB
1820 signal(SIGINT, SIG_IGN);
1821 signal(SIGHUP, SIG_IGN);
1822 signal(SIGTERM, SIG_IGN);
1823 signal(SIGALRM, SIG_IGN);
1824
1825 stop_tracers();
1826 wait_tracers();
1827 del_tracers();
1828 rel_devpaths();
e0a1988b
JA
1829}
1830
3fe0b570 1831static void handle_sigint(__attribute__((__unused__)) int sig)
8e86c98a 1832{
3fe0b570
AB
1833 done = 1;
1834 stop_tracers();
8e86c98a
JA
1835}
1836
3fe0b570 1837static void show_stats(struct list_head *devpaths)
659bcc3f 1838{
3fe0b570
AB
1839 FILE *ofp;
1840 struct list_head *p;
1841 unsigned long long nevents, data_read;
1842 unsigned long long total_drops = 0;
1843 unsigned long long total_events = 0;
1844
1845 if (piped_output)
1846 ofp = my_fopen("/dev/null", "w");
1847 else
1848 ofp = stdout;
ff11d54c 1849
3fe0b570
AB
1850 __list_for_each(p, devpaths) {
1851 int cpu;
1852 struct pdc_stats *sp;
1853 struct devpath *dpp = list_entry(p, struct devpath, head);
e0a1988b 1854
3fe0b570
AB
1855 if (net_mode == Net_server)
1856 printf("server: end of run for %s:%s\n",
1857 dpp->ch->hostname, dpp->buts_name);
e0a1988b 1858
3fe0b570
AB
1859 data_read = 0;
1860 nevents = 0;
1861
1862 fprintf(ofp, "=== %s ===\n", dpp->buts_name);
1863 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
1864 /*
1865 * Estimate events if not known...
1866 */
1867 if (sp->nevents == 0) {
1868 sp->nevents = sp->data_read /
1869 sizeof(struct blk_io_trace);
ff11d54c 1870 }
e0a1988b 1871
3fe0b570
AB
1872 fprintf(ofp,
1873 " CPU%3d: %20llu events, %8llu KiB data\n",
1874 cpu, sp->nevents, (sp->data_read + 1023) >> 10);
e0a1988b 1875
3fe0b570
AB
1876 data_read += sp->data_read;
1877 nevents += sp->nevents;
e0a1988b
JA
1878 }
1879
3fe0b570
AB
1880 fprintf(ofp, " Total: %20llu events (dropped %llu),"
1881 " %8llu KiB data\n", nevents,
1882 dpp->drops, (data_read + 1024) >> 10);
8e86c98a 1883
3fe0b570
AB
1884 total_drops += dpp->drops;
1885 total_events += (nevents + dpp->drops);
8e86c98a
JA
1886 }
1887
3fe0b570
AB
1888 fflush(ofp);
1889 if (piped_output)
1890 fclose(ofp);
8e86c98a 1891
3fe0b570
AB
1892 if (total_drops) {
1893 double drops_ratio = 1.0;
8e86c98a 1894
3fe0b570
AB
1895 if (total_events)
1896 drops_ratio = (double)total_drops/(double)total_events;
8e86c98a 1897
3fe0b570
AB
1898 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
1899 "Consider using a larger buffer size (-b) "
1900 "and/or more buffers (-n)\n",
1901 total_drops, 100.0 * drops_ratio);
8e86c98a 1902 }
8e86c98a
JA
1903}
1904
3fe0b570 1905static int handle_args(int argc, char *argv[])
8e86c98a 1906{
3fe0b570 1907 int c, i;
e3e74029 1908 struct statfs st;
d39c04ca
AB
1909 int act_mask_tmp = 0;
1910
1911 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1912 switch (c) {
1913 case 'a':
1914 i = find_mask_map(optarg);
1915 if (i < 0) {
3fe0b570 1916 fprintf(stderr, "Invalid action mask %s\n",
d39c04ca 1917 optarg);
7425d456 1918 return 1;
d39c04ca
AB
1919 }
1920 act_mask_tmp |= i;
1921 break;
1922
1923 case 'A':
3fe0b570 1924 if ((sscanf(optarg, "%x", &i) != 1) ||
98f8386b 1925 !valid_act_opt(i)) {
d39c04ca 1926 fprintf(stderr,
ab197ca7 1927 "Invalid set action mask %s/0x%x\n",
d39c04ca 1928 optarg, i);
7425d456 1929 return 1;
d39c04ca
AB
1930 }
1931 act_mask_tmp = i;
1932 break;
d0ca268b 1933
d39c04ca 1934 case 'd':
3fe0b570 1935 if (add_devpath(optarg) != 0)
e7c9f3ff 1936 return 1;
d39c04ca
AB
1937 break;
1938
cf1edb17
AB
1939 case 'I': {
1940 char dev_line[256];
3fe0b570 1941 FILE *ifp = my_fopen(optarg, "r");
cf1edb17
AB
1942
1943 if (!ifp) {
3fe0b570
AB
1944 fprintf(stderr,
1945 "Invalid file for devices %s\n",
cf1edb17
AB
1946 optarg);
1947 return 1;
1948 }
1949
1950 while (fscanf(ifp, "%s\n", dev_line) == 1)
3fe0b570 1951 if (add_devpath(dev_line) != 0)
cf1edb17
AB
1952 return 1;
1953 break;
1954 }
cf1edb17 1955
5270dddd 1956 case 'r':
3d06efea 1957 debugfs_path = optarg;
5270dddd
JA
1958 break;
1959
d5396421 1960 case 'o':
66efebf8 1961 output_name = optarg;
d5396421 1962 break;
bc39777c
JA
1963 case 'k':
1964 kill_running_trace = 1;
1965 break;
ece238a6
NS
1966 case 'w':
1967 stop_watch = atoi(optarg);
1968 if (stop_watch <= 0) {
1969 fprintf(stderr,
1970 "Invalid stopwatch value (%d secs)\n",
1971 stop_watch);
1972 return 1;
1973 }
1974 break;
57ea8602 1975 case 'V':
5d4f19d9 1976 case 'v':
52724a0e 1977 printf("%s version %s\n", argv[0], blktrace_version);
3fe0b570
AB
1978 exit(0);
1979 /*NOTREACHED*/
129aa440 1980 case 'b':
eb3c8108 1981 buf_size = strtoul(optarg, NULL, 10);
183a0855 1982 if (buf_size <= 0 || buf_size > 16*1024) {
3fe0b570
AB
1983 fprintf(stderr, "Invalid buffer size (%lu)\n",
1984 buf_size);
129aa440
JA
1985 return 1;
1986 }
1987 buf_size <<= 10;
1988 break;
1989 case 'n':
eb3c8108 1990 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
1991 if (buf_nr <= 0) {
1992 fprintf(stderr,
eb3c8108 1993 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
1994 return 1;
1995 }
1996 break;
d1d7f15f
JA
1997 case 'D':
1998 output_dir = optarg;
1999 break;
8e86c98a
JA
2000 case 'h':
2001 net_mode = Net_client;
2002 strcpy(hostname, optarg);
2003 break;
2004 case 'l':
2005 net_mode = Net_server;
2006 break;
2007 case 'p':
2008 net_port = atoi(optarg);
2009 break;
32f18c48 2010 case 's':
79971f43 2011 net_use_sendfile = 0;
32f18c48 2012 break;
d39c04ca 2013 default:
ee1f4158 2014 show_usage(argv[0]);
3fe0b570
AB
2015 exit(1);
2016 /*NOTREACHED*/
d39c04ca
AB
2017 }
2018 }
2019
3fe0b570
AB
2020 while (optind < argc)
2021 if (add_devpath(argv[optind++]) != 0)
2022 return 1;
8e86c98a 2023
3fe0b570
AB
2024 if (net_mode != Net_server && ndevs == 0) {
2025 show_usage(argv[0]);
2026 return 1;
2027 }
8e86c98a 2028
3fe0b570
AB
2029 if (statfs(debugfs_path, &st) < 0 || st.f_type != (long)DEBUGFS_TYPE) {
2030 fprintf(stderr, "Invalid debug path %s: %d/%s\n",
2031 debugfs_path, errno, strerror(errno));
2032 return 1;
2033 }
2034
2035 if (act_mask_tmp != 0)
2036 act_mask = act_mask_tmp;
2037
e58f3937
AB
2038 if (net_mode == Net_client && net_setup_addr())
2039 return 1;
2040
3fe0b570
AB
2041 /*
2042 * Set up for appropriate PFD handler based upon output name.
2043 */
2044 if (net_client_use_sendfile())
2045 handle_pfds = handle_pfds_netclient;
2046 else if (net_client_use_send())
2047 handle_pfds = handle_pfds_entries;
2048 else if (output_name && (strcmp(output_name, "-") == 0)) {
2049 piped_output = 1;
2050 handle_pfds = handle_pfds_entries;
2051 pfp = stdout;
2052 setvbuf(pfp, NULL, _IONBF, 0);
2053 } else
2054 handle_pfds = handle_pfds_file;
2055 return 0;
2056}
2057
2058static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
2059 int fd)
2060{
2061 struct cl_conn *nc;
2062
2063 nc = malloc(sizeof(*nc));
2064 memset(nc, 0, sizeof(*nc));
2065
2066 time(&nc->connect_time);
2067 nc->ch = ch;
2068 nc->fd = fd;
2069 nc->ncpus = -1;
2070
2071 list_add_tail(&nc->ch_head, &ch->conn_list);
2072 ch->connects++;
2073
2074 list_add_tail(&nc->ns_head, &ns->conn_list);
2075 ns->connects++;
2076 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2077}
2078
2079static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
2080 struct cl_conn *nc)
2081{
2082 net_close_connection(&nc->fd);
2083
2084 list_del(&nc->ch_head);
2085 ch->connects--;
2086
2087 list_del(&nc->ns_head);
2088 ns->connects--;
2089 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2090
2091 free(nc);
2092}
2093
2094static struct cl_host *net_find_client_host(struct net_server_s *ns,
2095 struct in_addr cl_in_addr)
2096{
2097 struct list_head *p;
2098
2099 __list_for_each(p, &ns->ch_list) {
2100 struct cl_host *ch = list_entry(p, struct cl_host, head);
2101
2102 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
2103 return ch;
2104 }
2105
2106 return NULL;
2107}
2108
2109static struct cl_host *net_add_client_host(struct net_server_s *ns,
2110 struct sockaddr_in *addr)
2111{
2112 struct cl_host *ch;
2113
2114 ch = malloc(sizeof(*ch));
2115 memset(ch, 0, sizeof(*ch));
2116
2117 ch->ns = ns;
2118 ch->cl_in_addr = addr->sin_addr;
2119 list_add_tail(&ch->head, &ns->ch_list);
2120 ns->nchs++;
ec685dd2 2121
3fe0b570
AB
2122 ch->hostname = strdup(inet_ntoa(addr->sin_addr));
2123 printf("server: connection from %s\n", ch->hostname);
2124
2125 INIT_LIST_HEAD(&ch->conn_list);
2126 INIT_LIST_HEAD(&ch->devpaths);
2127
2128 return ch;
2129}
2130
2131static void device_done(struct devpath *dpp, int ncpus)
2132{
2133 int cpu;
2134 struct io_info *iop;
2135
2136 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
2137 close_iop(iop);
2138
2139 list_del(&dpp->head);
2140 dpp_free(dpp);
2141}
2142
2143static void net_ch_remove(struct cl_host *ch, int ncpus)
2144{
2145 struct list_head *p, *q;
2146 struct net_server_s *ns = ch->ns;
2147
2148 list_for_each_safe(p, q, &ch->devpaths) {
2149 struct devpath *dpp = list_entry(p, struct devpath, head);
2150 device_done(dpp, ncpus);
ec685dd2 2151 }
8e86c98a 2152
3fe0b570
AB
2153 list_for_each_safe(p, q, &ch->conn_list) {
2154 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
2155
2156 ch_rem_connection(ns, ch, nc);
22cd0c02
JA
2157 }
2158
3fe0b570
AB
2159 list_del(&ch->head);
2160 ns->nchs--;
2161
2162 if (ch->hostname)
2163 free(ch->hostname);
2164 free(ch);
2165}
2166
2167static void net_add_connection(struct net_server_s *ns)
2168{
2169 int fd;
2170 struct cl_host *ch;
2171 socklen_t socklen = sizeof(ns->addr);
2172
d5302b03 2173 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
3fe0b570
AB
2174 if (fd < 0) {
2175 /*
2176 * This is OK: we just won't accept this connection,
2177 * nothing fatal.
2178 */
2179 perror("accept");
2180 } else {
2181 ch = net_find_client_host(ns, ns->addr.sin_addr);
2182 if (!ch)
2183 ch = net_add_client_host(ns, &ns->addr);
2184
2185 ch_add_connection(ns, ch, fd);
d39c04ca 2186 }
3fe0b570 2187}
d39c04ca 2188
3fe0b570
AB
2189static struct devpath *nc_add_dpp(struct cl_conn *nc,
2190 struct blktrace_net_hdr *bnh,
2191 time_t connect_time)
2192{
2193 int cpu;
2194 struct io_info *iop;
2195 struct devpath *dpp;
2196
2197 dpp = malloc(sizeof(*dpp));
2198 memset(dpp, 0, sizeof(*dpp));
2199
2200 dpp->buts_name = strdup(bnh->buts_name);
2201 dpp->path = strdup(bnh->buts_name);
2202 dpp->fd = -1;
2203 dpp->ch = nc->ch;
2204 dpp->cl_id = bnh->cl_id;
2205 dpp->cl_connect_time = connect_time;
2206 dpp->ncpus = nc->ncpus;
2207 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
2208 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
2209
2210 list_add_tail(&dpp->head, &nc->ch->devpaths);
2211 nc->ch->ndevs++;
2212
2213 dpp->ios = calloc(nc->ncpus, sizeof(*iop));
2214 memset(dpp->ios, 0, ndevs * sizeof(*iop));
2215
2216 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
2217 iop->dpp = dpp;
2218 iop->nc = nc;
2219 init_mmap_info(&iop->mmap_info);
2220
2221 if (iop_open(iop, cpu))
2222 goto err;
69dd57c2
AB
2223 }
2224
3fe0b570 2225 return dpp;
69dd57c2 2226
3fe0b570
AB
2227err:
2228 /*
2229 * Need to unravel what's been done...
2230 */
2231 while (cpu >= 0)
2232 close_iop(&dpp->ios[cpu--]);
2233 dpp_free(dpp);
2234
2235 return NULL;
2236}
d0ca268b 2237
3fe0b570
AB
2238static struct devpath *nc_find_dpp(struct cl_conn *nc,
2239 struct blktrace_net_hdr *bnh)
2240{
2241 struct list_head *p;
2242 time_t connect_time = nc->connect_time;
3d06efea 2243
3fe0b570
AB
2244 __list_for_each(p, &nc->ch->devpaths) {
2245 struct devpath *dpp = list_entry(p, struct devpath, head);
2246
2247 if (!strcmp(dpp->buts_name, bnh->buts_name))
2248 return dpp;
2249
2250 if (dpp->cl_id == bnh->cl_id)
2251 connect_time = dpp->cl_connect_time;
d0ca268b
JA
2252 }
2253
3fe0b570
AB
2254 return nc_add_dpp(nc, bnh, connect_time);
2255}
bc39777c 2256
3fe0b570
AB
2257static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
2258 struct blktrace_net_hdr *bnh)
2259{
2260 int ret;
2261 struct io_info *iop = &dpp->ios[bnh->cpu];
2262 struct mmap_info *mip = &iop->mmap_info;
2263
2264 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info)) {
2265 fprintf(stderr, "ncd(%s:%d): mmap failed\n",
2266 nc->ch->hostname, nc->fd);
2267 exit(1);
2268 }
2269
2270 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
2271 if (ret > 0) {
2272 pdc_dr_update(dpp, bnh->cpu, ret);
2273 mip->fs_size += ret;
2274 mip->fs_off += ret;
2275 } else if (ret < 0)
2276 exit(1);
2277}
2278
2279/*
2280 * Returns 1 if we closed a host - invalidates other polling information
2281 * that may be present.
2282 */
2283static int net_client_data(struct cl_conn *nc)
2284{
2285 int ret;
2286 struct devpath *dpp;
2287 struct blktrace_net_hdr bnh;
2288
2289 ret = net_get_header(nc, &bnh);
2290 if (ret == 0)
7425d456 2291 return 0;
3fe0b570
AB
2292
2293 if (ret < 0) {
2294 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
2295 exit(1);
2296 }
2297
2298 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
2299 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
2300 exit(1);
2301 }
2302
2303 if (!data_is_native) {
2304 bnh.magic = be32_to_cpu(bnh.magic);
2305 bnh.cpu = be32_to_cpu(bnh.cpu);
2306 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
2307 bnh.len = be32_to_cpu(bnh.len);
2308 bnh.cl_id = be32_to_cpu(bnh.cl_id);
2309 bnh.buf_size = be32_to_cpu(bnh.buf_size);
2310 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
2311 bnh.page_size = be32_to_cpu(bnh.page_size);
2312 }
2313
2314 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2315 fprintf(stderr, "ncd(%s:%d): bad data magic\n",
2316 nc->ch->hostname, nc->fd);
2317 exit(1);
2318 }
2319
2320 if (nc->ncpus == -1)
2321 nc->ncpus = bnh.max_cpus;
2322
2323 /*
2324 * len == 0 means the other end is sending us a new connection/dpp
2325 * len == 1 means that the other end signalled end-of-run
2326 */
2327 dpp = nc_find_dpp(nc, &bnh);
2328 if (bnh.len == 0) {
2329 /*
2330 * Just adding in the dpp above is enough
2331 */
2332 ack_open_close(nc->fd, dpp->buts_name);
2333 nc->ch->cl_opens++;
2334 } else if (bnh.len == 1) {
2335 /*
2336 * overload cpu count with dropped events
2337 */
2338 dpp->drops = bnh.cpu;
2339
2340 ack_open_close(nc->fd, dpp->buts_name);
2341 if (--nc->ch->cl_opens == 0) {
2342 show_stats(&nc->ch->devpaths);
2343 net_ch_remove(nc->ch, nc->ncpus);
2344 return 1;
2345 }
2346 } else
2347 net_client_read_data(nc, dpp, &bnh);
2348
2349 return 0;
2350}
2351
2352static void handle_client_data(struct net_server_s *ns, int events)
2353{
2354 struct cl_conn *nc;
2355 struct pollfd *pfd;
2356 struct list_head *p, *q;
2357
2358 pfd = &ns->pfds[1];
2359 list_for_each_safe(p, q, &ns->conn_list) {
2360 if (pfd->revents & POLLIN) {
2361 nc = list_entry(p, struct cl_conn, ns_head);
2362
2363 if (net_client_data(nc) || --events == 0)
2364 break;
2365 }
2366 pfd++;
2367 }
2368}
2369
2370static void net_setup_pfds(struct net_server_s *ns)
2371{
2372 struct pollfd *pfd;
2373 struct list_head *p;
2374
2375 ns->pfds[0].fd = ns->listen_fd;
2376 ns->pfds[0].events = POLLIN;
2377
2378 pfd = &ns->pfds[1];
2379 __list_for_each(p, &ns->conn_list) {
2380 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
2381
2382 pfd->fd = nc->fd;
2383 pfd->events = POLLIN;
2384 pfd++;
2385 }
2386}
2387
2388static int net_server_handle_connections(struct net_server_s *ns)
2389{
2390 int events;
2391
2392 printf("server: waiting for connections...\n");
2393
2394 while (!done) {
2395 net_setup_pfds(ns);
2396 events = poll(ns->pfds, ns->connects + 1, -1);
2397 if (events < 0) {
2398 if (errno != EINTR) {
2399 perror("FATAL: poll error");
2400 return 1;
2401 }
2402 } else if (events > 0) {
2403 if (ns->pfds[0].revents & POLLIN) {
2404 net_add_connection(ns);
2405 events--;
2406 }
2407
2408 if (events)
2409 handle_client_data(ns, events);
2410 }
2411 }
2412
2413 return 0;
2414}
2415
2416static int net_server(void)
2417{
2418 int fd, opt;
2419 int ret = 1;
2420 struct net_server_s net_server;
2421 struct net_server_s *ns = &net_server;
2422
2423 memset(ns, 0, sizeof(*ns));
2424 INIT_LIST_HEAD(&ns->ch_list);
2425 INIT_LIST_HEAD(&ns->conn_list);
2426 ns->pfds = malloc(sizeof(struct pollfd));
2427
2428 fd = my_socket(AF_INET, SOCK_STREAM, 0);
2429 if (fd < 0) {
2430 perror("server: socket");
2431 goto out;
2432 }
2433
2434 opt = 1;
2435 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
2436 perror("setsockopt");
2437 goto out;
2438 }
2439
2440 memset(&ns->addr, 0, sizeof(ns->addr));
2441 ns->addr.sin_family = AF_INET;
2442 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
2443 ns->addr.sin_port = htons(net_port);
2444
2445 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
2446 perror("bind");
2447 goto out;
2448 }
2449
2450 if (listen(fd, 1) < 0) {
2451 perror("listen");
2452 goto out;
2453 }
2454
2455 /*
2456 * The actual server looping is done here:
2457 */
2458 ns->listen_fd = fd;
2459 ret = net_server_handle_connections(ns);
2460
2461 /*
2462 * Clean up and return...
2463 */
2464out:
2465 free(ns->pfds);
2466 return ret;
2467}
2468
2469int main(int argc, char *argv[])
2470{
2471 int ret = 0;
2472
2473 setlocale(LC_NUMERIC, "en_US");
2474 pagesize = getpagesize();
2475 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
2476 if (ncpus < 0) {
2477 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed %d/%s\n",
2478 errno, strerror(errno));
2479 ret = 1;
2480 goto out;
2481 }
2482
2483 if (handle_args(argc, argv)) {
2484 ret = 1;
2485 goto out;
bc39777c
JA
2486 }
2487
d0ca268b
JA
2488 signal(SIGINT, handle_sigint);
2489 signal(SIGHUP, handle_sigint);
2490 signal(SIGTERM, handle_sigint);
ece238a6 2491 signal(SIGALRM, handle_sigint);
38e1f0c6 2492 signal(SIGPIPE, SIG_IGN);
d0ca268b 2493
3fe0b570
AB
2494 if (kill_running_trace) {
2495 struct devpath *dpp;
2496 struct list_head *p;
8e86c98a 2497
3fe0b570
AB
2498 __list_for_each(p, &devpaths) {
2499 dpp = list_entry(p, struct devpath, head);
2500 if (__stop_trace(dpp->fd)) {
2501 fprintf(stderr,
2502 "BLKTRACETEARDOWN %s failed: %d/%s\n",
2503 dpp->path, errno, strerror(errno));
2504 }
2505 }
2506 } else if (net_mode == Net_server) {
2507 if (output_name) {
2508 fprintf(stderr, "-o ignored in server mode\n");
2509 output_name = NULL;
2510 }
8e86c98a 2511
3fe0b570
AB
2512 ret = net_server();
2513 } else {
2514 atexit(exit_tracing);
830fd65c 2515
3fe0b570
AB
2516 if (net_mode == Net_client)
2517 printf("blktrace: connecting to %s\n", hostname);
ece238a6 2518
3fe0b570 2519 setup_buts();
d0ca268b 2520
3fe0b570
AB
2521 if (use_tracer_devpaths()) {
2522 if (setup_tracer_devpaths())
2523 goto out;
d0ca268b 2524
3fe0b570
AB
2525 if (piped_output)
2526 handle_list = handle_list_file;
2527 else
2528 handle_list = handle_list_net;
2529 }
eb3c8108 2530
3fe0b570
AB
2531 ntracers = start_tracers();
2532 if (ntracers != ncpus)
2533 stop_tracers();
2534 else {
2535 if (net_mode == Net_client)
2536 printf("blktrace: connected!\n");
2537 if (stop_watch)
2538 alarm(stop_watch);
2539 }
2540
2541 wait_tracers();
2542 if (ntracers == ncpus)
2543 show_stats(&devpaths);
2544
2545 if (net_client_use_send())
2546 close_client_connections();
2547 del_tracers();
2548 }
d0ca268b 2549
3fe0b570
AB
2550out:
2551 if (pfp)
2552 fclose(pfp);
2553 rel_devpaths();
2554 return ret;
2555}