blkiomon: Fix an output error
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd 4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
46e37c55 5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
d956a2cd 6 *
3fe0b570
AB
7 * Rewrite to have a single thread per CPU (managing all devices on that CPU)
8 * Alan D. Brunelle <alan.brunelle@hp.com> - January 2009
9 *
d956a2cd
JA
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
d0ca268b 24 */
3fe0b570
AB
25
26#include <errno.h>
27#include <stdarg.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <fcntl.h>
32#include <getopt.h>
33#include <sched.h>
d0ca268b 34#include <unistd.h>
3fe0b570 35#include <poll.h>
d0ca268b 36#include <signal.h>
3fe0b570
AB
37#include <pthread.h>
38#include <locale.h>
d0ca268b 39#include <sys/ioctl.h>
3fe0b570
AB
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/vfs.h>
b7106311 43#include <sys/mman.h>
3fe0b570
AB
44#include <sys/param.h>
45#include <sys/time.h>
46#include <sys/resource.h>
8e86c98a 47#include <sys/socket.h>
8e86c98a
JA
48#include <netinet/in.h>
49#include <arpa/inet.h>
50#include <netdb.h>
32f18c48 51#include <sys/sendfile.h>
d0ca268b 52
3fe0b570 53#include "btt/list.h"
d0ca268b 54#include "blktrace.h"
52724a0e 55
8f551a39
JA
56/*
57 * You may want to increase this even more, if you are logging at a high
58 * rate and see skipped/missed events
59 */
3fe0b570
AB
60#define BUF_SIZE (512 * 1024)
61#define BUF_NR (4)
62
63#define FILE_VBUF_SIZE (128 * 1024)
64
65#define DEBUGFS_TYPE (0x64626720)
66#define TRACE_NET_PORT (8462)
67
68enum {
69 Net_none = 0,
70 Net_server,
71 Net_client,
72};
73
df81fdb5
AB
74enum thread_status {
75 Th_running,
76 Th_leaving,
77 Th_error
78};
79
3fe0b570
AB
80/*
81 * Generic stats collected: nevents can be _roughly_ estimated by data_read
82 * (discounting pdu...)
83 *
84 * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
85 */
86struct pdc_stats {
87 unsigned long long data_read;
88 unsigned long long nevents;
89};
90
91struct devpath {
92 struct list_head head;
93 char *path; /* path to device special file */
94 char *buts_name; /* name returned from bt kernel code */
95 struct pdc_stats *stats;
96 int fd, idx, ncpus;
97 unsigned long long drops;
98
99 /*
100 * For piped output only:
101 *
102 * Each tracer will have a tracer_devpath_head that it will add new
103 * data onto. It's list is protected above (tracer_devpath_head.mutex)
104 * and it will signal the processing thread using the dp_cond,
105 * dp_mutex & dp_entries variables above.
106 */
107 struct tracer_devpath_head *heads;
108
109 /*
110 * For network server mode only:
111 */
112 struct cl_host *ch;
113 u32 cl_id;
114 time_t cl_connect_time;
115 struct io_info *ios;
116};
117
118/*
119 * For piped output to stdout we will have each tracer thread (one per dev)
120 * tack buffers read from the relay queues on a per-device list.
121 *
122 * The main thread will then collect trace buffers from each of lists in turn.
123 *
124 * We will use a mutex to guard each of the trace_buf list. The tracers
125 * can then signal the main thread using <dp_cond,dp_mutex> and
126 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
127 * signal. When dp_entries is 0, the main thread will wait for that condition
128 * to be signalled.)
129 *
130 * adb: It may be better just to have a large buffer per tracer per dev,
131 * and then use it as a ring-buffer. This would certainly cut down a lot
132 * of malloc/free thrashing, at the cost of more memory movements (potentially).
133 */
134struct trace_buf {
135 struct list_head head;
136 struct devpath *dpp;
137 void *buf;
138 int cpu, len;
139};
140
141struct tracer_devpath_head {
142 pthread_mutex_t mutex;
143 struct list_head head;
144 struct trace_buf *prev;
145};
146
147/*
148 * Used to handle the mmap() interfaces for output file (containing traces)
149 */
150struct mmap_info {
151 void *fs_buf;
152 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
153 unsigned long buf_size, buf_nr;
154 int pagesize;
155};
156
157/*
158 * Each thread doing work on a (client) side of blktrace will have one
159 * of these. The ios array contains input/output information, pfds holds
160 * poll() data. The volatile's provide flags to/from the main executing
161 * thread.
162 */
163struct tracer {
164 struct list_head head;
165 struct io_info *ios;
166 struct pollfd *pfds;
167 pthread_t thread;
3fe0b570 168 int cpu, nios;
df81fdb5 169 volatile int status, is_done;
3fe0b570
AB
170};
171
172/*
173 * networking stuff follows. we include a magic number so we know whether
174 * to endianness convert or not.
175 *
176 * The len field is overloaded:
177 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu
178 * 1 - Indicates a "close" - Shut down connection orderly
179 *
180 * The cpu field is overloaded on close: it will contain the number of drops.
181 */
182struct blktrace_net_hdr {
183 u32 magic; /* same as trace magic */
184 char buts_name[32]; /* trace name */
185 u32 cpu; /* for which cpu */
186 u32 max_cpus;
187 u32 len; /* length of following trace data */
188 u32 cl_id; /* id for set of client per-cpu connections */
189 u32 buf_size; /* client buf_size for this trace */
190 u32 buf_nr; /* client buf_nr for this trace */
191 u32 page_size; /* client page_size for this trace */
192};
193
194/*
195 * Each host encountered has one of these. The head is used to link this
196 * on to the network server's ch_list. Connections associated with this
197 * host are linked on conn_list, and any devices traced on that host
198 * are connected on the devpaths list.
199 */
200struct cl_host {
201 struct list_head head;
202 struct list_head conn_list;
203 struct list_head devpaths;
204 struct net_server_s *ns;
205 char *hostname;
206 struct in_addr cl_in_addr;
207 int connects, ndevs, cl_opens;
208};
209
210/*
211 * Each connection (client to server socket ('fd')) has one of these. A
212 * back reference to the host ('ch'), and lists headers (for the host
213 * list, and the network server conn_list) are also included.
214 */
215struct cl_conn {
216 struct list_head ch_head, ns_head;
217 struct cl_host *ch;
218 int fd, ncpus;
219 time_t connect_time;
220};
221
222/*
223 * The network server requires some poll structures to be maintained -
224 * one per conection currently on conn_list. The nchs/ch_list values
225 * are for each host connected to this server. The addr field is used
226 * for scratch as new connections are established.
227 */
228struct net_server_s {
229 struct list_head conn_list;
230 struct list_head ch_list;
231 struct pollfd *pfds;
232 int listen_fd, connects, nchs;
233 struct sockaddr_in addr;
234};
235
236/*
237 * This structure is (generically) used to providide information
238 * for a read-to-write set of values.
239 *
240 * ifn & ifd represent input information
241 *
242 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
243 */
244struct io_info {
245 struct devpath *dpp;
246 FILE *ofp;
247 char *obuf;
248 struct cl_conn *nc; /* Server network connection */
249
250 /*
251 * mmap controlled output files
252 */
253 struct mmap_info mmap_info;
254
255 /*
256 * Client network fields
257 */
258 unsigned int ready;
259 unsigned long long data_queued;
260
261 /*
262 * Input/output file descriptors & names
263 */
264 int ifd, ofd;
265 char ifn[MAXPATHLEN + 64];
266 char ofn[MAXPATHLEN + 64];
267};
268
269static char blktrace_version[] = "2.0.0";
270
271/*
272 * Linkage to blktrace helper routines (trace conversions)
273 */
274int data_is_native = -1;
275
055cc3e5 276static int ndevs;
3fe0b570
AB
277static int ncpus;
278static int pagesize;
279static int act_mask = ~0U;
055cc3e5
AB
280static int kill_running_trace;
281static int stop_watch;
282static int piped_output;
283
3fe0b570
AB
284static char *debugfs_path = "/sys/kernel/debug";
285static char *output_name;
286static char *output_dir;
055cc3e5 287
3fe0b570
AB
288static unsigned long buf_size = BUF_SIZE;
289static unsigned long buf_nr = BUF_NR;
055cc3e5
AB
290
291static FILE *pfp;
292
3fe0b570
AB
293static LIST_HEAD(devpaths);
294static LIST_HEAD(tracers);
055cc3e5 295
3fe0b570 296static volatile int done;
d0ca268b 297
6488ca48
AB
298/*
299 * tracer threads add entries, the main thread takes them off and processes
300 * them. These protect the dp_entries variable.
301 */
3fe0b570
AB
302static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
303static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
304static volatile int dp_entries;
305
6488ca48 306/*
df81fdb5 307 * These synchronize master / thread interactions.
6488ca48 308 */
df81fdb5
AB
309static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER;
310static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER;
311static volatile int nthreads_running;
312static volatile int nthreads_leaving;
313static volatile int nthreads_error;
314static volatile int tracers_run;
6488ca48 315
3fe0b570
AB
316/*
317 * network cmd line params
318 */
e58f3937 319static struct sockaddr_in hostname_addr;
3fe0b570
AB
320static char hostname[MAXHOSTNAMELEN];
321static int net_port = TRACE_NET_PORT;
322static int net_use_sendfile = 1;
323static int net_mode;
324static int *cl_fds;
007c233c 325
3fe0b570
AB
326static int (*handle_pfds)(struct tracer *, int, int);
327static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
e3e74029 328
5d4f19d9 329#define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
d5396421 330static struct option l_opts[] = {
5c86134e 331 {
d39c04ca 332 .name = "dev",
428683db 333 .has_arg = required_argument,
d39c04ca
AB
334 .flag = NULL,
335 .val = 'd'
336 },
cf1edb17
AB
337 {
338 .name = "input-devs",
339 .has_arg = required_argument,
340 .flag = NULL,
341 .val = 'I'
342 },
5c86134e 343 {
d39c04ca 344 .name = "act-mask",
428683db 345 .has_arg = required_argument,
d39c04ca
AB
346 .flag = NULL,
347 .val = 'a'
348 },
5c86134e 349 {
d39c04ca 350 .name = "set-mask",
428683db 351 .has_arg = required_argument,
d39c04ca
AB
352 .flag = NULL,
353 .val = 'A'
354 },
5c86134e 355 {
5270dddd 356 .name = "relay",
428683db 357 .has_arg = required_argument,
5270dddd
JA
358 .flag = NULL,
359 .val = 'r'
360 },
d5396421
JA
361 {
362 .name = "output",
428683db 363 .has_arg = required_argument,
d5396421
JA
364 .flag = NULL,
365 .val = 'o'
366 },
bc39777c
JA
367 {
368 .name = "kill",
428683db 369 .has_arg = no_argument,
bc39777c
JA
370 .flag = NULL,
371 .val = 'k'
372 },
ece238a6
NS
373 {
374 .name = "stopwatch",
428683db 375 .has_arg = required_argument,
ece238a6
NS
376 .flag = NULL,
377 .val = 'w'
378 },
5d4f19d9
JA
379 {
380 .name = "version",
381 .has_arg = no_argument,
382 .flag = NULL,
383 .val = 'v'
384 },
52724a0e
JA
385 {
386 .name = "version",
387 .has_arg = no_argument,
388 .flag = NULL,
57ea8602 389 .val = 'V'
52724a0e 390 },
129aa440 391 {
3f65c585 392 .name = "buffer-size",
129aa440
JA
393 .has_arg = required_argument,
394 .flag = NULL,
395 .val = 'b'
396 },
397 {
3f65c585 398 .name = "num-sub-buffers",
129aa440
JA
399 .has_arg = required_argument,
400 .flag = NULL,
401 .val = 'n'
402 },
d1d7f15f 403 {
3f65c585 404 .name = "output-dir",
d1d7f15f
JA
405 .has_arg = required_argument,
406 .flag = NULL,
407 .val = 'D'
408 },
8e86c98a
JA
409 {
410 .name = "listen",
411 .has_arg = no_argument,
412 .flag = NULL,
413 .val = 'l'
414 },
415 {
416 .name = "host",
417 .has_arg = required_argument,
418 .flag = NULL,
419 .val = 'h'
420 },
421 {
422 .name = "port",
423 .has_arg = required_argument,
424 .flag = NULL,
425 .val = 'p'
426 },
32f18c48 427 {
79971f43 428 .name = "no-sendfile",
32f18c48
JA
429 .has_arg = no_argument,
430 .flag = NULL,
431 .val = 's'
432 },
71ef8b7c
JA
433 {
434 .name = NULL,
435 }
d39c04ca
AB
436};
437
b9a7e9fc
ES
438static char usage_str[] = "\n\n" \
439 "-d <dev> | --dev=<dev>\n" \
440 "[ -r <debugfs path> | --relay=<debugfs path> ]\n" \
441 "[ -o <file> | --output=<file>]\n" \
442 "[ -D <dir> | --output-dir=<dir>\n" \
443 "[ -w <time> | --stopwatch=<time>]\n" \
444 "[ -a <action field> | --act-mask=<action field>]\n" \
445 "[ -A <action mask> | --set-mask=<action mask>]\n" \
446 "[ -b <size> | --buffer-size]\n" \
447 "[ -n <number> | --num-sub-buffers=<number>]\n" \
448 "[ -l | --listen]\n" \
449 "[ -h <hostname> | --host=<hostname>]\n" \
450 "[ -p <port number> | --port=<port number>]\n" \
451 "[ -s | --no-sendfile]\n" \
452 "[ -I <devs file> | --input-devs=<devs file>]\n" \
453 "[ -v <version> | --version]\n" \
454 "[ -V <version> | --version]\n" \
455
3fe0b570
AB
456 "\t-d Use specified device. May also be given last after options\n" \
457 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
458 "\t-o File(s) to send output to\n" \
459 "\t-D Directory to prepend to output file names\n" \
3fe0b570
AB
460 "\t-w Stop after defined time, in seconds\n" \
461 "\t-a Only trace specified actions. See documentation\n" \
462 "\t-A Give trace mask as a single value. See documentation\n" \
463 "\t-b Sub buffer size in KiB\n" \
464 "\t-n Number of sub buffers\n" \
465 "\t-l Run in network listen mode (blktrace server)\n" \
466 "\t-h Run in network client mode, connecting to the given host\n" \
467 "\t-p Network port to use (default 8462)\n" \
468 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
469 "\t-I Add devices found in <devs file>\n" \
b9a7e9fc 470 "\t-v Print program version info\n" \
3fe0b570 471 "\t-V Print program version info\n\n";
9db17354 472
3fe0b570
AB
473static void clear_events(struct pollfd *pfd)
474{
475 pfd->events = 0;
476 pfd->revents = 0;
477}
21f55651 478
3fe0b570
AB
479static inline int net_client_use_sendfile(void)
480{
481 return net_mode == Net_client && net_use_sendfile;
482}
21f55651 483
3fe0b570
AB
484static inline int net_client_use_send(void)
485{
486 return net_mode == Net_client && !net_use_sendfile;
487}
b9d4294e 488
3fe0b570
AB
489static inline int use_tracer_devpaths(void)
490{
491 return piped_output || net_client_use_send();
492}
b9d4294e 493
3fe0b570
AB
494static inline int in_addr_eq(struct in_addr a, struct in_addr b)
495{
496 return a.s_addr == b.s_addr;
497}
007c233c 498
3fe0b570
AB
499static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
500{
501 dpp->stats[cpu].data_read += data_read;
502}
0cc7d25e 503
3fe0b570
AB
504static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
505{
506 dpp->stats[cpu].nevents += nevents;
507}
9db17354 508
3fe0b570
AB
509static void show_usage(char *prog)
510{
bc14c53f 511 fprintf(stderr, "Usage: %s %s", prog, usage_str);
3fe0b570 512}
9db17354 513
df81fdb5
AB
514/*
515 * Create a timespec 'msec' milliseconds into the future
516 */
517static inline void make_timespec(struct timespec *tsp, long delta_msec)
518{
519 struct timeval now;
520
521 gettimeofday(&now, NULL);
522 tsp->tv_sec = now.tv_sec;
523 tsp->tv_nsec = 1000L * now.tv_usec;
524
525 tsp->tv_nsec += (delta_msec * 1000000L);
526 if (tsp->tv_nsec > 1000000000L) {
527 long secs = tsp->tv_nsec / 1000000000L;
528
529 tsp->tv_sec += secs;
530 tsp->tv_nsec -= (secs * 1000000000L);
531 }
532}
533
534/*
535 * Add a timer to ensure wait ends
536 */
537static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
538{
539 struct timespec ts;
540
541 make_timespec(&ts, 50);
542 pthread_cond_timedwait(cond, mutex, &ts);
543}
544
545static void unblock_tracers(void)
546{
547 pthread_mutex_lock(&mt_mutex);
548 tracers_run = 1;
549 pthread_cond_broadcast(&mt_cond);
550 pthread_mutex_unlock(&mt_mutex);
551}
552
553static void tracer_wait_unblock(struct tracer *tp)
554{
555 pthread_mutex_lock(&mt_mutex);
556 while (!tp->is_done && !tracers_run)
557 pthread_cond_wait(&mt_cond, &mt_mutex);
558 pthread_mutex_unlock(&mt_mutex);
559}
560
561static void tracer_signal_ready(struct tracer *tp,
562 enum thread_status th_status,
563 int status)
564{
565 pthread_mutex_lock(&mt_mutex);
566 tp->status = status;
567
568 if (th_status == Th_running)
569 nthreads_running++;
570 else if (th_status == Th_error)
571 nthreads_error++;
572 else
573 nthreads_leaving++;
574
575 pthread_cond_signal(&mt_cond);
576 pthread_mutex_unlock(&mt_mutex);
577}
578
579static void wait_tracers_ready(int ncpus_started)
580{
581 pthread_mutex_lock(&mt_mutex);
582 while ((nthreads_running + nthreads_error) < ncpus_started)
583 t_pthread_cond_wait(&mt_cond, &mt_mutex);
584 pthread_mutex_unlock(&mt_mutex);
585}
586
587static void wait_tracers_leaving(void)
588{
589 pthread_mutex_lock(&mt_mutex);
590 while (nthreads_leaving < nthreads_running)
591 t_pthread_cond_wait(&mt_cond, &mt_mutex);
592 pthread_mutex_unlock(&mt_mutex);
593}
594
3fe0b570
AB
595static void init_mmap_info(struct mmap_info *mip)
596{
597 mip->buf_size = buf_size;
598 mip->buf_nr = buf_nr;
599 mip->pagesize = pagesize;
600}
b7106311 601
3fe0b570
AB
602static void net_close_connection(int *fd)
603{
604 shutdown(*fd, SHUT_RDWR);
605 close(*fd);
606 *fd = -1;
607}
ff11d54c 608
3fe0b570
AB
609static void dpp_free(struct devpath *dpp)
610{
611 if (dpp->stats)
612 free(dpp->stats);
613 if (dpp->ios)
614 free(dpp->ios);
615 if (dpp->path)
616 free(dpp->path);
617 if (dpp->buts_name)
618 free(dpp->buts_name);
619 free(dpp);
620}
d0ca268b 621
3fe0b570
AB
622static int lock_on_cpu(int cpu)
623{
624 cpu_set_t cpu_mask;
ff11d54c 625
3fe0b570
AB
626 CPU_ZERO(&cpu_mask);
627 CPU_SET(cpu, &cpu_mask);
f547a39d 628 if (sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask) < 0)
3fe0b570 629 return errno;
d0ca268b 630
3fe0b570
AB
631 return 0;
632}
e7c9f3ff 633
3fe0b570
AB
634static int increase_limit(int resource, rlim_t increase)
635{
636 struct rlimit rlim;
637 int save_errno = errno;
8e86c98a 638
3fe0b570
AB
639 if (!getrlimit(resource, &rlim)) {
640 rlim.rlim_cur += increase;
641 if (rlim.rlim_cur >= rlim.rlim_max)
642 rlim.rlim_max = rlim.rlim_cur + increase;
72ca8801 643
3fe0b570
AB
644 if (!setrlimit(resource, &rlim))
645 return 1;
646 }
99c1f5ab 647
3fe0b570
AB
648 errno = save_errno;
649 return 0;
650}
e0a1988b 651
3fe0b570
AB
652static int handle_open_failure(void)
653{
654 if (errno == ENFILE || errno == EMFILE)
655 return increase_limit(RLIMIT_NOFILE, 16);
656 return 0;
657}
99c1f5ab 658
3fe0b570
AB
659static int handle_mem_failure(size_t length)
660{
661 if (errno == ENFILE)
662 return handle_open_failure();
663 else if (errno == ENOMEM)
664 return increase_limit(RLIMIT_MEMLOCK, 2 * length);
665 return 0;
666}
99c1f5ab 667
3fe0b570
AB
668static FILE *my_fopen(const char *path, const char *mode)
669{
670 FILE *fp;
8e86c98a 671
3fe0b570
AB
672 do {
673 fp = fopen(path, mode);
674 } while (fp == NULL && handle_open_failure());
8e86c98a 675
3fe0b570
AB
676 return fp;
677}
8e86c98a 678
3fe0b570
AB
679static int my_open(const char *path, int flags)
680{
681 int fd;
8e86c98a 682
3fe0b570
AB
683 do {
684 fd = open(path, flags);
685 } while (fd < 0 && handle_open_failure());
e0a1988b 686
3fe0b570
AB
687 return fd;
688}
ff11d54c 689
3fe0b570
AB
690static int my_socket(int domain, int type, int protocol)
691{
692 int fd;
ff11d54c 693
3fe0b570
AB
694 do {
695 fd = socket(domain, type, protocol);
696 } while (fd < 0 && handle_open_failure());
8e86c98a 697
3fe0b570
AB
698 return fd;
699}
700
d5302b03
AB
701static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
702{
703 int fd;
704
705 do {
706 fd = accept(sockfd, addr, addrlen);
707 } while (fd < 0 && handle_open_failure());
708
709 return fd;
710}
711
3fe0b570
AB
712static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
713 off_t offset)
69dd57c2 714{
3fe0b570 715 void *new;
69dd57c2 716
3fe0b570
AB
717 do {
718 new = mmap(addr, length, prot, flags, fd, offset);
719 } while (new == MAP_FAILED && handle_mem_failure(length));
720
721 return new;
722}
723
724static int my_mlock(const void *addr, size_t len)
725{
726 int ret;
727
728 do {
729 ret = mlock(addr, len);
730 } while (ret < 0 && handle_mem_failure(len));
731
732 return ret;
733}
734
055cc3e5
AB
735static int setup_mmap(int fd, unsigned int maxlen, struct mmap_info *mip)
736{
737 if (mip->fs_off + maxlen > mip->fs_buf_len) {
738 unsigned long nr = max(16, mip->buf_nr);
739
740 if (mip->fs_buf) {
741 munlock(mip->fs_buf, mip->fs_buf_len);
742 munmap(mip->fs_buf, mip->fs_buf_len);
743 mip->fs_buf = NULL;
744 }
745
746 mip->fs_off = mip->fs_size & (mip->pagesize - 1);
747 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
748 mip->fs_max_size += mip->fs_buf_len;
749
750 if (ftruncate(fd, mip->fs_max_size) < 0) {
751 perror("setup_mmap: ftruncate");
752 return 1;
753 }
754
755 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
756 MAP_SHARED, fd,
757 mip->fs_size - mip->fs_off);
758 if (mip->fs_buf == MAP_FAILED) {
759 perror("setup_mmap: mmap");
760 return 1;
761 }
762 my_mlock(mip->fs_buf, mip->fs_buf_len);
763 }
764
765 return 0;
766}
767
3fe0b570
AB
768static int __stop_trace(int fd)
769{
770 /*
771 * Should be stopped, don't complain if it isn't
772 */
773 ioctl(fd, BLKTRACESTOP);
774 return ioctl(fd, BLKTRACETEARDOWN);
775}
776
777static int write_data(char *buf, int len)
778{
779 int ret;
780
781rewrite:
782 ret = fwrite(buf, len, 1, pfp);
783 if (ferror(pfp) || ret != 1) {
784 if (errno == EINTR) {
785 clearerr(pfp);
786 goto rewrite;
787 }
788
789 if (!piped_output || (errno != EPIPE && errno != EBADF)) {
790 fprintf(stderr, "write(%d) failed: %d/%s\n",
791 len, errno, strerror(errno));
69dd57c2 792 }
3fe0b570 793 goto err;
69dd57c2
AB
794 }
795
3fe0b570 796 fflush(pfp);
69dd57c2 797 return 0;
3fe0b570
AB
798
799err:
800 clearerr(pfp);
801 return 1;
69dd57c2
AB
802}
803
804/*
3fe0b570 805 * Returns the number of bytes read (successfully)
69dd57c2 806 */
3fe0b570 807static int __net_recv_data(int fd, void *buf, unsigned int len)
69dd57c2 808{
3fe0b570
AB
809 unsigned int bytes_left = len;
810
811 while (bytes_left && !done) {
812 int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
813
814 if (ret == 0)
815 break;
816 else if (ret < 0) {
5d65b5e6
AB
817 if (errno == EAGAIN) {
818 usleep(50);
819 continue;
820 }
821 perror("server: net_recv_data: recv failed");
055cc3e5 822 break;
3fe0b570
AB
823 } else {
824 buf += ret;
825 bytes_left -= ret;
826 }
827 }
69dd57c2 828
3fe0b570 829 return len - bytes_left;
69dd57c2
AB
830}
831
3fe0b570 832static int net_recv_data(int fd, void *buf, unsigned int len)
8e86c98a 833{
3fe0b570
AB
834 return __net_recv_data(fd, buf, len);
835}
7035d92d 836
3fe0b570
AB
837/*
838 * Returns number of bytes written
839 */
840static int net_send_data(int fd, void *buf, unsigned int buf_len)
841{
842 int ret;
843 unsigned int bytes_left = buf_len;
844
845 while (bytes_left) {
846 ret = send(fd, buf, bytes_left, 0);
847 if (ret < 0) {
848 perror("send");
849 break;
850 }
851
852 buf += ret;
853 bytes_left -= ret;
7035d92d
JA
854 }
855
3fe0b570 856 return buf_len - bytes_left;
8e86c98a
JA
857}
858
3fe0b570 859static int net_send_header(int fd, int cpu, char *buts_name, int len)
eb3c8108 860{
3fe0b570 861 struct blktrace_net_hdr hdr;
eb3c8108 862
3fe0b570 863 memset(&hdr, 0, sizeof(hdr));
eb3c8108 864
3fe0b570
AB
865 hdr.magic = BLK_IO_TRACE_MAGIC;
866 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
867 hdr.buts_name[sizeof(hdr.buts_name)-1] = '\0';
868 hdr.cpu = cpu;
869 hdr.max_cpus = ncpus;
870 hdr.len = len;
871 hdr.cl_id = getpid();
872 hdr.buf_size = buf_size;
873 hdr.buf_nr = buf_nr;
874 hdr.page_size = pagesize;
eb3c8108 875
3fe0b570
AB
876 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
877}
eb3c8108 878
3fe0b570
AB
879static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
880{
881 struct blktrace_net_hdr ret_hdr;
eb3c8108 882
3fe0b570
AB
883 net_send_header(fd, cpu, buts_name, len);
884 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
885}
eb3c8108 886
3fe0b570
AB
887static void net_send_open(int fd, int cpu, char *buts_name)
888{
889 net_send_open_close(fd, cpu, buts_name, 0);
eb3c8108
JA
890}
891
3fe0b570 892static void net_send_close(int fd, char *buts_name, int drops)
d0ca268b 893{
3fe0b570
AB
894 /*
895 * Overload CPU w/ number of drops
896 *
897 * XXX: Need to clear/set done around call - done=1 (which
898 * is true here) stops reads from happening... :-(
899 */
900 done = 0;
901 net_send_open_close(fd, drops, buts_name, 1);
902 done = 1;
903}
d0ca268b 904
3fe0b570
AB
905static void ack_open_close(int fd, char *buts_name)
906{
907 net_send_header(fd, 0, buts_name, 2);
908}
d0ca268b 909
3fe0b570
AB
910static void net_send_drops(int fd)
911{
912 struct list_head *p;
ed71a31e 913
3fe0b570
AB
914 __list_for_each(p, &devpaths) {
915 struct devpath *dpp = list_entry(p, struct devpath, head);
916
917 net_send_close(fd, dpp->buts_name, dpp->drops);
d0ca268b 918 }
3fe0b570 919}
d0ca268b 920
3fe0b570
AB
921/*
922 * Returns:
055cc3e5
AB
923 * 0: "EOF"
924 * 1: OK
925 * -1: Error
3fe0b570
AB
926 */
927static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
928{
929 int bytes_read;
930 int fl = fcntl(nc->fd, F_GETFL);
931
932 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
933 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
934 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
935
936 if (bytes_read == sizeof(*bnh))
937 return 1;
938 else if (bytes_read == 0)
939 return 0;
055cc3e5
AB
940 else
941 return -1;
d0ca268b
JA
942}
943
e58f3937 944static int net_setup_addr(void)
d0ca268b 945{
e58f3937 946 struct sockaddr_in *addr = &hostname_addr;
cf9208ea 947
e58f3937
AB
948 memset(addr, 0, sizeof(*addr));
949 addr->sin_family = AF_INET;
950 addr->sin_port = htons(net_port);
3fe0b570 951
e58f3937
AB
952 if (inet_aton(hostname, &addr->sin_addr) != 1) {
953 struct hostent *hent;
954retry:
955 hent = gethostbyname(hostname);
3fe0b570 956 if (!hent) {
e58f3937
AB
957 if (h_errno == TRY_AGAIN) {
958 usleep(100);
959 goto retry;
960 } else if (h_errno == NO_RECOVERY) {
961 fprintf(stderr, "gethostbyname(%s)"
962 "non-recoverable error encountered\n",
963 hostname);
964 } else {
965 /*
966 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA
967 */
968 fprintf(stderr, "Host %s not found\n",
969 hostname);
970 }
3fe0b570
AB
971 return 1;
972 }
973
e58f3937 974 memcpy(&addr->sin_addr, hent->h_addr, 4);
3fe0b570
AB
975 strcpy(hostname, hent->h_name);
976 }
7035d92d 977
e58f3937
AB
978 return 0;
979}
980
981static int net_setup_client(void)
982{
983 int fd;
984 struct sockaddr_in *addr = &hostname_addr;
985
3fe0b570
AB
986 fd = my_socket(AF_INET, SOCK_STREAM, 0);
987 if (fd < 0) {
988 perror("client: socket");
989 return -1;
990 }
cf9208ea 991
e58f3937 992 if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
3fe0b570
AB
993 if (errno == ECONNREFUSED)
994 fprintf(stderr,
995 "\nclient: Connection to %s refused, "
996 "perhaps the server is not started?\n\n",
997 hostname);
998 else
999 perror("client: connect");
055cc3e5 1000
3fe0b570
AB
1001 close(fd);
1002 return -1;
707b0914 1003 }
3fe0b570
AB
1004
1005 return fd;
d0ca268b
JA
1006}
1007
3fe0b570 1008static int open_client_connections(void)
e7c9f3ff 1009{
3fe0b570 1010 int cpu;
e7c9f3ff 1011
3fe0b570
AB
1012 cl_fds = calloc(ncpus, sizeof(*cl_fds));
1013 for (cpu = 0; cpu < ncpus; cpu++) {
1014 cl_fds[cpu] = net_setup_client();
1015 if (cl_fds[cpu] < 0)
1016 goto err;
eb3c8108 1017 }
3fe0b570
AB
1018 return 0;
1019
1020err:
1021 while (cpu > 0)
1022 close(cl_fds[cpu--]);
1023 free(cl_fds);
1024 return 1;
e7c9f3ff
NS
1025}
1026
3fe0b570 1027static void close_client_connections(void)
eb3c8108 1028{
3fe0b570
AB
1029 if (cl_fds) {
1030 int cpu, *fdp;
eb3c8108 1031
3fe0b570
AB
1032 for (cpu = 0, fdp = cl_fds; cpu < ncpus; cpu++, fdp++) {
1033 if (*fdp >= 0) {
1034 net_send_drops(*fdp);
1035 net_close_connection(fdp);
1036 }
7934e668 1037 }
3fe0b570 1038 free(cl_fds);
ff11d54c 1039 }
eb3c8108
JA
1040}
1041
3fe0b570 1042static void setup_buts(void)
d0ca268b 1043{
3fe0b570 1044 struct list_head *p;
bbabf03a 1045
3fe0b570
AB
1046 __list_for_each(p, &devpaths) {
1047 struct blk_user_trace_setup buts;
1048 struct devpath *dpp = list_entry(p, struct devpath, head);
ae9f71b3 1049
3fe0b570
AB
1050 memset(&buts, 0, sizeof(buts));
1051 buts.buf_size = buf_size;
1052 buts.buf_nr = buf_nr;
1053 buts.act_mask = act_mask;
1054
055cc3e5 1055 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) {
3b552a2d
AB
1056 dpp->ncpus = ncpus;
1057 dpp->buts_name = strdup(buts.name);
1058 if (dpp->stats)
1059 free(dpp->stats);
1060 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
1061 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
055cc3e5
AB
1062 } else
1063 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
1064 dpp->path, errno, strerror(errno));
3b552a2d
AB
1065 }
1066}
1067
1068static void start_buts(void)
1069{
1070 struct list_head *p;
1071
1072 __list_for_each(p, &devpaths) {
1073 struct devpath *dpp = list_entry(p, struct devpath, head);
1074
1075 if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
3fe0b570
AB
1076 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
1077 dpp->path, errno, strerror(errno));
bbabf03a 1078 }
3fe0b570 1079 }
8a43bac5
JA
1080}
1081
3fe0b570 1082static int get_drops(struct devpath *dpp)
8e86c98a 1083{
3fe0b570
AB
1084 int fd, drops = 0;
1085 char fn[MAXPATHLEN + 64], tmp[256];
8e86c98a 1086
3fe0b570
AB
1087 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
1088 dpp->buts_name);
8e86c98a 1089
3fe0b570
AB
1090 fd = my_open(fn, O_RDONLY);
1091 if (fd < 0) {
1092 /*
1093 * This may be ok: the kernel may not support
1094 * dropped counts.
1095 */
1096 if (errno != ENOENT)
1097 fprintf(stderr, "Could not open %s: %d/%s\n",
1098 fn, errno, strerror(errno));
1099 return 0;
1100 } else if (read(fd, tmp, sizeof(tmp)) < 0) {
1101 fprintf(stderr, "Could not read %s: %d/%s\n",
1102 fn, errno, strerror(errno));
1103 } else
1104 drops = atoi(tmp);
1105 close(fd);
8e86c98a 1106
3fe0b570 1107 return drops;
8e86c98a
JA
1108}
1109
3fe0b570 1110static void get_all_drops(void)
a3e4d330 1111{
3fe0b570 1112 struct list_head *p;
21f55651 1113
3fe0b570
AB
1114 __list_for_each(p, &devpaths) {
1115 struct devpath *dpp = list_entry(p, struct devpath, head);
055cc3e5 1116
3fe0b570 1117 dpp->drops = get_drops(dpp);
21f55651 1118 }
9db17354 1119}
eb3c8108 1120
3fe0b570 1121static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
9db17354 1122{
3fe0b570 1123 struct trace_buf *tbp;
21f55651 1124
3fe0b570
AB
1125 tbp = malloc(sizeof(*tbp) + bufsize);
1126 INIT_LIST_HEAD(&tbp->head);
1127 tbp->len = 0;
1128 tbp->buf = (void *)(tbp + 1);
1129 tbp->cpu = cpu;
1130 tbp->dpp = NULL; /* Will be set when tbp is added */
21f55651 1131
3fe0b570 1132 return tbp;
a3e4d330
JA
1133}
1134
3fe0b570 1135static void free_tracer_heads(struct devpath *dpp)
b7106311 1136{
3fe0b570
AB
1137 int cpu;
1138 struct tracer_devpath_head *hd;
b7106311 1139
3fe0b570
AB
1140 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
1141 if (hd->prev)
1142 free(hd->prev);
055cc3e5 1143
3fe0b570
AB
1144 pthread_mutex_destroy(&hd->mutex);
1145 }
1146 free(dpp->heads);
1147}
b7106311 1148
3fe0b570
AB
1149static int setup_tracer_devpaths(void)
1150{
1151 struct list_head *p;
b7106311 1152
3fe0b570
AB
1153 if (net_client_use_send())
1154 if (open_client_connections())
1155 return 1;
b7106311 1156
3fe0b570
AB
1157 __list_for_each(p, &devpaths) {
1158 int cpu;
1159 struct tracer_devpath_head *hd;
1160 struct devpath *dpp = list_entry(p, struct devpath, head);
b7106311 1161
3fe0b570
AB
1162 dpp->heads = calloc(ncpus, sizeof(struct tracer_devpath_head));
1163 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
1164 INIT_LIST_HEAD(&hd->head);
1165 pthread_mutex_init(&hd->mutex, NULL);
1166 hd->prev = NULL;
1167 }
b7106311
JA
1168 }
1169
3fe0b570 1170 return 0;
b7106311
JA
1171}
1172
3fe0b570
AB
1173static inline void add_trace_buf(struct devpath *dpp, int cpu,
1174 struct trace_buf **tbpp)
18eed2a7 1175{
3fe0b570
AB
1176 struct trace_buf *tbp = *tbpp;
1177 struct tracer_devpath_head *hd = &dpp->heads[cpu];
18eed2a7 1178
3fe0b570 1179 tbp->dpp = dpp;
2f064793 1180
3fe0b570
AB
1181 pthread_mutex_lock(&hd->mutex);
1182 list_add_tail(&tbp->head, &hd->head);
1183 pthread_mutex_unlock(&hd->mutex);
18eed2a7 1184
3fe0b570 1185 *tbpp = alloc_trace_buf(cpu, buf_size);
18eed2a7
JA
1186}
1187
3fe0b570 1188static inline void incr_entries(int entries_handled)
a3e4d330 1189{
3fe0b570
AB
1190 pthread_mutex_lock(&dp_mutex);
1191 if (dp_entries == 0)
1192 pthread_cond_signal(&dp_cond);
1193 dp_entries += entries_handled;
1194 pthread_mutex_unlock(&dp_mutex);
a3e4d330
JA
1195}
1196
055cc3e5
AB
1197static void decr_entries(int handled)
1198{
1199 pthread_mutex_lock(&dp_mutex);
1200 dp_entries -= handled;
1201 pthread_mutex_unlock(&dp_mutex);
1202}
1203
1204static int wait_empty_entries(void)
1205{
1206 pthread_mutex_lock(&dp_mutex);
1207 while (!done && dp_entries == 0)
1208 t_pthread_cond_wait(&dp_cond, &dp_mutex);
1209 pthread_mutex_unlock(&dp_mutex);
1210
1211 return !done;
1212}
1213
3fe0b570 1214static int add_devpath(char *path)
8e86c98a 1215{
3fe0b570
AB
1216 int fd;
1217 struct devpath *dpp;
4b747a40 1218 struct list_head *p;
3fe0b570 1219
4b747a40
ES
1220 /*
1221 * Verify device is not duplicated
1222 */
1223 __list_for_each(p, &devpaths) {
1224 struct devpath *tmp = list_entry(p, struct devpath, head);
1225 if (!strcmp(tmp->path, path))
1226 return 0;
1227 }
8e86c98a 1228 /*
3fe0b570 1229 * Verify device is valid before going too far
8e86c98a 1230 */
3fe0b570
AB
1231 fd = my_open(path, O_RDONLY | O_NONBLOCK);
1232 if (fd < 0) {
1233 fprintf(stderr, "Invalid path %s specified: %d/%s\n",
1234 path, errno, strerror(errno));
1235 return 1;
1236 }
8e86c98a 1237
3fe0b570
AB
1238 dpp = malloc(sizeof(*dpp));
1239 memset(dpp, 0, sizeof(*dpp));
1240 dpp->path = strdup(path);
1241 dpp->fd = fd;
1242 dpp->idx = ndevs++;
1243 list_add_tail(&dpp->head, &devpaths);
8e86c98a 1244
3fe0b570 1245 return 0;
8e86c98a
JA
1246}
1247
3fe0b570 1248static void rel_devpaths(void)
a3e4d330 1249{
3fe0b570 1250 struct list_head *p, *q;
a3e4d330 1251
3fe0b570
AB
1252 list_for_each_safe(p, q, &devpaths) {
1253 struct devpath *dpp = list_entry(p, struct devpath, head);
a3e4d330 1254
3fe0b570
AB
1255 list_del(&dpp->head);
1256 __stop_trace(dpp->fd);
1257 close(dpp->fd);
a3e4d330 1258
3fe0b570
AB
1259 if (dpp->heads)
1260 free_tracer_heads(dpp);
a3e4d330 1261
3fe0b570
AB
1262 dpp_free(dpp);
1263 ndevs--;
b7106311 1264 }
8e86c98a 1265}
b7106311 1266
3fe0b570 1267static int flush_subbuf_net(struct trace_buf *tbp)
8e86c98a 1268{
3fe0b570
AB
1269 int fd = cl_fds[tbp->cpu];
1270 struct devpath *dpp = tbp->dpp;
b7106311 1271
3fe0b570
AB
1272 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
1273 return 1;
055cc3e5 1274 else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
3fe0b570 1275 return 1;
a3e4d330 1276
8e86c98a 1277 return 0;
a3e4d330
JA
1278}
1279
3fe0b570
AB
1280static int
1281handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
1282 struct list_head *list)
8e86c98a 1283{
3fe0b570
AB
1284 struct trace_buf *tbp;
1285 struct list_head *p, *q;
1286 int entries_handled = 0;
8e86c98a 1287
3fe0b570
AB
1288 list_for_each_safe(p, q, list) {
1289 tbp = list_entry(p, struct trace_buf, head);
8e86c98a 1290
3fe0b570
AB
1291 list_del(&tbp->head);
1292 entries_handled++;
6a752c90 1293
3fe0b570
AB
1294 if (cl_fds[tbp->cpu] >= 0) {
1295 if (flush_subbuf_net(tbp)) {
1296 close(cl_fds[tbp->cpu]);
1297 cl_fds[tbp->cpu] = -1;
1298 }
1299 }
7ab2f837 1300
3fe0b570 1301 free(tbp);
7934e668
JA
1302 }
1303
3fe0b570 1304 return entries_handled;
6a752c90
JA
1305}
1306
055cc3e5
AB
1307/*
1308 * Tack 'tbp's buf onto the tail of 'prev's buf
1309 */
1310static struct trace_buf *tb_combine(struct trace_buf *prev,
1311 struct trace_buf *tbp)
1312{
1313 unsigned long tot_len;
1314
1315 tot_len = prev->len + tbp->len;
1316 if (tot_len > buf_size) {
1317 /*
1318 * tbp->head isn't connected (it was 'prev'
1319 * so it had been taken off of the list
1320 * before). Therefore, we can realloc
1321 * the whole structures, as the other fields
1322 * are "static".
1323 */
1324 prev = realloc(prev->buf, sizeof(*prev) + tot_len);
1325 prev->buf = (void *)(prev + 1);
1326 }
1327
1328 memcpy(prev->buf + prev->len, tbp->buf, tbp->len);
1329 prev->len = tot_len;
1330
1331 free(tbp);
1332 return prev;
1333}
1334
3fe0b570
AB
1335static int handle_list_file(struct tracer_devpath_head *hd,
1336 struct list_head *list)
f6fead25 1337{
3fe0b570
AB
1338 int off, t_len, nevents;
1339 struct blk_io_trace *t;
1340 struct list_head *p, *q;
1341 int entries_handled = 0;
1342 struct trace_buf *tbp, *prev;
11629347 1343
3fe0b570
AB
1344 prev = hd->prev;
1345 list_for_each_safe(p, q, list) {
1346 tbp = list_entry(p, struct trace_buf, head);
1347 list_del(&tbp->head);
1348 entries_handled++;
18eed2a7 1349
3fe0b570
AB
1350 /*
1351 * If there was some leftover before, tack this new
1352 * entry onto the tail of the previous one.
1353 */
055cc3e5
AB
1354 if (prev)
1355 tbp = tb_combine(prev, tbp);
ff11d54c 1356
3fe0b570
AB
1357 /*
1358 * See how many whole traces there are - send them
1359 * all out in one go.
1360 */
1361 off = 0;
1362 nevents = 0;
1363 while (off + (int)sizeof(*t) <= tbp->len) {
1364 t = (struct blk_io_trace *)(tbp->buf + off);
1365 t_len = sizeof(*t) + t->pdu_len;
1366 if (off + t_len > tbp->len)
1367 break;
ff11d54c 1368
3fe0b570
AB
1369 off += t_len;
1370 nevents++;
1371 }
1372 if (nevents)
1373 pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
4aeec019 1374
3fe0b570
AB
1375 /*
1376 * Write any full set of traces, any remaining data is kept
1377 * for the next pass.
1378 */
1379 if (off) {
055cc3e5 1380 if (write_data(tbp->buf, off) || off == tbp->len) {
3fe0b570 1381 free(tbp);
055cc3e5
AB
1382 prev = NULL;
1383 }
3fe0b570
AB
1384 else {
1385 /*
1386 * Move valid data to beginning of buffer
1387 */
1388 tbp->len -= off;
1389 memmove(tbp->buf, tbp->buf + off, tbp->len);
1390 prev = tbp;
1391 }
1392 } else
1393 prev = tbp;
ff11d54c 1394 }
3fe0b570 1395 hd->prev = prev;
ff11d54c 1396
3fe0b570 1397 return entries_handled;
ff11d54c
TZ
1398}
1399
3fe0b570 1400static void __process_trace_bufs(void)
8a43bac5 1401{
3fe0b570
AB
1402 int cpu;
1403 struct list_head *p;
1404 struct list_head list;
1405 int handled = 0;
1406
1407 __list_for_each(p, &devpaths) {
1408 struct devpath *dpp = list_entry(p, struct devpath, head);
1409 struct tracer_devpath_head *hd = dpp->heads;
1410
1411 for (cpu = 0; cpu < ncpus; cpu++, hd++) {
1412 pthread_mutex_lock(&hd->mutex);
1413 if (list_empty(&hd->head)) {
1414 pthread_mutex_unlock(&hd->mutex);
1415 continue;
1416 }
8a43bac5 1417
3fe0b570
AB
1418 list_replace_init(&hd->head, &list);
1419 pthread_mutex_unlock(&hd->mutex);
6480258a 1420
3fe0b570
AB
1421 handled += handle_list(hd, &list);
1422 }
d0ca268b
JA
1423 }
1424
055cc3e5
AB
1425 if (handled)
1426 decr_entries(handled);
8a43bac5
JA
1427}
1428
3fe0b570 1429static void process_trace_bufs(void)
8a43bac5 1430{
055cc3e5 1431 while (wait_empty_entries())
3fe0b570 1432 __process_trace_bufs();
3fe0b570 1433}
3a9d6c13 1434
3fe0b570
AB
1435static void clean_trace_bufs(void)
1436{
3a9d6c13 1437 /*
3fe0b570
AB
1438 * No mutex needed here: we're only reading from the lists,
1439 * tracers are done
3a9d6c13 1440 */
3fe0b570
AB
1441 while (dp_entries)
1442 __process_trace_bufs();
1443}
4b5db44a 1444
3fe0b570
AB
1445static inline void read_err(int cpu, char *ifn)
1446{
1447 if (errno != EAGAIN)
1448 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
1449 cpu, ifn, errno, strerror(errno));
4b5db44a
JA
1450}
1451
3fe0b570 1452static int net_sendfile(struct io_info *iop)
d5396421 1453{
3fe0b570 1454 int ret;
d5396421 1455
3fe0b570
AB
1456 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
1457 if (ret < 0) {
1458 perror("sendfile");
1459 return 1;
1460 } else if (ret < (int)iop->ready) {
1461 fprintf(stderr, "short sendfile send (%d of %d)\n",
1462 ret, iop->ready);
1463 return 1;
1464 }
91816d54 1465
9db17354 1466 return 0;
91816d54
JA
1467}
1468
3fe0b570 1469static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
d0ca268b 1470{
3fe0b570 1471 struct devpath *dpp = iop->dpp;
d0ca268b 1472
3fe0b570
AB
1473 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
1474 return 1;
1475 return net_sendfile(iop);
1476}
d0ca268b 1477
3fe0b570 1478static int fill_ofname(struct io_info *iop, int cpu)
8e86c98a 1479{
3fe0b570 1480 int len;
e3bf54d8 1481 struct stat sb;
3fe0b570 1482 char *dst = iop->ofn;
8e86c98a
JA
1483
1484 if (output_dir)
3fe0b570 1485 len = snprintf(iop->ofn, sizeof(iop->ofn), "%s/", output_dir);
dd870ef6 1486 else
3fe0b570 1487 len = snprintf(iop->ofn, sizeof(iop->ofn), "./");
8e86c98a 1488
e3bf54d8 1489 if (net_mode == Net_server) {
3fe0b570 1490 struct cl_conn *nc = iop->nc;
e0a1988b 1491
3fe0b570
AB
1492 len += sprintf(dst + len, "%s-", nc->ch->hostname);
1493 len += strftime(dst + len, 64, "%F-%T/",
1494 gmtime(&iop->dpp->cl_connect_time));
e3bf54d8
JA
1495 }
1496
3fe0b570 1497 if (stat(iop->ofn, &sb) < 0) {
e3bf54d8 1498 if (errno != ENOENT) {
3fe0b570
AB
1499 fprintf(stderr,
1500 "Destination dir %s stat failed: %d/%s\n",
1501 iop->ofn, errno, strerror(errno));
e3bf54d8
JA
1502 return 1;
1503 }
60886290
JM
1504 /*
1505 * There is no synchronization between multiple threads
1506 * trying to create the directory at once. It's harmless
1507 * to let them try, so just detect the problem and move on.
1508 */
1509 if (mkdir(iop->ofn, 0755) < 0 && errno != EEXIST) {
3fe0b570
AB
1510 fprintf(stderr,
1511 "Destination dir %s can't be made: %d/%s\n",
1512 iop->ofn, errno, strerror(errno));
e3bf54d8
JA
1513 return 1;
1514 }
1515 }
1516
8e86c98a 1517 if (output_name)
3fe0b570
AB
1518 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1519 output_name, cpu);
8e86c98a 1520 else
3fe0b570
AB
1521 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1522 iop->dpp->buts_name, cpu);
e3bf54d8
JA
1523
1524 return 0;
8e86c98a
JA
1525}
1526
3fe0b570 1527static int set_vbuf(struct io_info *iop, int mode, size_t size)
0cc7d25e 1528{
3fe0b570
AB
1529 iop->obuf = malloc(size);
1530 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
1531 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
1532 iop->dpp->path, (int)size, errno,
1533 strerror(errno));
1534 free(iop->obuf);
ddf22842
JA
1535 return 1;
1536 }
d5396421 1537
ddf22842
JA
1538 return 0;
1539}
007c233c 1540
3fe0b570 1541static int iop_open(struct io_info *iop, int cpu)
ddf22842 1542{
3fe0b570
AB
1543 iop->ofd = -1;
1544 if (fill_ofname(iop, cpu))
1545 return 1;
0cc7d25e 1546
3fe0b570
AB
1547 iop->ofp = my_fopen(iop->ofn, "w+");
1548 if (iop->ofp == NULL) {
1549 fprintf(stderr, "Open output file %s failed: %d/%s\n",
1550 iop->ofn, errno, strerror(errno));
1551 return 1;
1552 }
055cc3e5 1553
3fe0b570
AB
1554 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
1555 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
1556 iop->ofn, errno, strerror(errno));
1557 fclose(iop->ofp);
1558 return 1;
d0ca268b
JA
1559 }
1560
3fe0b570 1561 iop->ofd = fileno(iop->ofp);
e7c9f3ff 1562 return 0;
d0ca268b
JA
1563}
1564
df81fdb5
AB
1565static void close_iop(struct io_info *iop)
1566{
1567 struct mmap_info *mip = &iop->mmap_info;
1568
1569 if (mip->fs_buf)
1570 munmap(mip->fs_buf, mip->fs_buf_len);
1571
1572 if (!piped_output) {
1573 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
1574 fprintf(stderr,
1575 "Ignoring err: ftruncate(%s): %d/%s\n",
1576 iop->ofn, errno, strerror(errno));
1577 }
1578 }
1579
1580 if (iop->ofp)
1581 fclose(iop->ofp);
1582 if (iop->obuf)
1583 free(iop->obuf);
1584}
1585
1586static void close_ios(struct tracer *tp)
1587{
1588 while (tp->nios > 0) {
1589 struct io_info *iop = &tp->ios[--tp->nios];
1590
1591 iop->dpp->drops = get_drops(iop->dpp);
1592 if (iop->ifd >= 0)
1593 close(iop->ifd);
1594
1595 if (iop->ofp)
1596 close_iop(iop);
1597 else if (iop->ofd >= 0) {
1598 struct devpath *dpp = iop->dpp;
1599
1600 net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
1601 net_close_connection(&iop->ofd);
1602 }
1603 }
1604
1605 free(tp->ios);
1606 free(tp->pfds);
1607}
1608
3fe0b570 1609static int open_ios(struct tracer *tp)
3aabcd89 1610{
3fe0b570
AB
1611 struct pollfd *pfd;
1612 struct io_info *iop;
1613 struct list_head *p;
1614
1615 tp->ios = calloc(ndevs, sizeof(struct io_info));
3fe0b570 1616 memset(tp->ios, 0, ndevs * sizeof(struct io_info));
055cc3e5
AB
1617
1618 tp->pfds = calloc(ndevs, sizeof(struct pollfd));
3fe0b570
AB
1619 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
1620
1621 tp->nios = 0;
1622 iop = tp->ios;
1623 pfd = tp->pfds;
1624 __list_for_each(p, &devpaths) {
1625 struct devpath *dpp = list_entry(p, struct devpath, head);
1626
1627 iop->dpp = dpp;
1628 iop->ofd = -1;
1629 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
1630 debugfs_path, dpp->buts_name, tp->cpu);
1631
1632 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
1633 if (iop->ifd < 0) {
1634 fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
1635 tp->cpu, iop->ifn, errno, strerror(errno));
1636 return 1;
1637 }
1638
1639 init_mmap_info(&iop->mmap_info);
1640
1641 pfd->fd = iop->ifd;
1642 pfd->events = POLLIN;
1643
1644 if (piped_output)
1645 ;
1646 else if (net_client_use_sendfile()) {
1647 iop->ofd = net_setup_client();
1648 if (iop->ofd < 0)
1649 goto err;
1650 net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
1651 } else if (net_mode == Net_none) {
1652 if (iop_open(iop, tp->cpu))
1653 goto err;
1654 } else {
1655 /*
1656 * This ensures that the server knows about all
1657 * connections & devices before _any_ closes
1658 */
1659 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
1660 }
007c233c 1661
3fe0b570
AB
1662 pfd++;
1663 iop++;
1664 tp->nios++;
9db17354 1665 }
3aabcd89 1666
3fe0b570 1667 return 0;
72ca8801 1668
3fe0b570
AB
1669err:
1670 close(iop->ifd); /* tp->nios _not_ bumped */
df81fdb5 1671 close_ios(tp);
3fe0b570 1672 return 1;
e7c9f3ff
NS
1673}
1674
3fe0b570 1675static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
e7c9f3ff 1676{
3fe0b570
AB
1677 struct mmap_info *mip;
1678 int i, ret, nentries = 0;
1679 struct pollfd *pfd = tp->pfds;
1680 struct io_info *iop = tp->ios;
1681
1682 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
1683 if (pfd->revents & POLLIN || force_read) {
1684 mip = &iop->mmap_info;
1685
1686 ret = setup_mmap(iop->ofd, buf_size, mip);
1687 if (ret < 0) {
1688 pfd->events = 0;
1689 break;
1690 }
428683db 1691
3fe0b570
AB
1692 ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
1693 buf_size);
1694 if (ret > 0) {
1695 pdc_dr_update(iop->dpp, tp->cpu, ret);
1696 mip->fs_size += ret;
1697 mip->fs_off += ret;
1698 nentries++;
1699 } else if (ret == 0) {
1700 /*
1701 * Short reads after we're done stop us
1702 * from trying reads.
1703 */
1704 if (tp->is_done)
1705 clear_events(pfd);
1706 } else {
1707 read_err(tp->cpu, iop->ifn);
1708 if (errno != EAGAIN || tp->is_done)
1709 clear_events(pfd);
1710 }
1711 nevs--;
e7c9f3ff 1712 }
e7c9f3ff 1713 }
56070ea4 1714
3fe0b570 1715 return nentries;
e7c9f3ff 1716}
52724a0e 1717
055cc3e5
AB
1718static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
1719{
1720 struct stat sb;
1721 int i, nentries = 0;
1722 struct pdc_stats *sp;
1723 struct pollfd *pfd = tp->pfds;
1724 struct io_info *iop = tp->ios;
1725
1726 for (i = 0; i < ndevs; i++, pfd++, iop++, sp++) {
1727 if (pfd->revents & POLLIN || force_read) {
1728 if (fstat(iop->ifd, &sb) < 0) {
1729 perror(iop->ifn);
1730 pfd->events = 0;
1731 } else if (sb.st_size > (off_t)iop->data_queued) {
1732 iop->ready = sb.st_size - iop->data_queued;
1733 iop->data_queued = sb.st_size;
1734
1735 if (!net_sendfile_data(tp, iop)) {
1736 pdc_dr_update(iop->dpp, tp->cpu,
1737 iop->ready);
1738 nentries++;
1739 } else
1740 clear_events(pfd);
1741 }
1742 if (--nevs == 0)
1743 break;
1744 }
1745 }
1746
1747 if (nentries)
1748 incr_entries(nentries);
1749
1750 return nentries;
1751}
1752
1753static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
1754{
1755 int i, nentries = 0;
1756 struct trace_buf *tbp;
1757 struct pollfd *pfd = tp->pfds;
1758 struct io_info *iop = tp->ios;
1759
1760 tbp = alloc_trace_buf(tp->cpu, buf_size);
1761 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1762 if (pfd->revents & POLLIN || force_read) {
1763 tbp->len = read(iop->ifd, tbp->buf, buf_size);
1764 if (tbp->len > 0) {
1765 pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
1766 add_trace_buf(iop->dpp, tp->cpu, &tbp);
1767 nentries++;
1768 } else if (tbp->len == 0) {
1769 /*
1770 * Short reads after we're done stop us
1771 * from trying reads.
1772 */
1773 if (tp->is_done)
1774 clear_events(pfd);
1775 } else {
1776 read_err(tp->cpu, iop->ifn);
1777 if (errno != EAGAIN || tp->is_done)
1778 clear_events(pfd);
1779 }
1780 if (!piped_output && --nevs == 0)
1781 break;
1782 }
1783 }
1784 free(tbp);
1785
1786 if (nentries)
1787 incr_entries(nentries);
1788
1789 return nentries;
1790}
1791
3fe0b570 1792static void *thread_main(void *arg)
8e86c98a 1793{
df81fdb5 1794 int ret, ndone, to_val;
3fe0b570 1795 struct tracer *tp = arg;
8e86c98a 1796
3fe0b570
AB
1797 ret = lock_on_cpu(tp->cpu);
1798 if (ret)
1799 goto err;
ff11d54c 1800
3fe0b570 1801 ret = open_ios(tp);
df81fdb5 1802 if (ret)
3fe0b570 1803 goto err;
6a6d3f0f 1804
3fe0b570
AB
1805 if (piped_output)
1806 to_val = 50; /* Frequent partial handles */
ff11d54c 1807 else
3fe0b570
AB
1808 to_val = 500; /* 1/2 second intervals */
1809
df81fdb5
AB
1810
1811 tracer_signal_ready(tp, Th_running, 0);
1812 tracer_wait_unblock(tp);
6488ca48 1813
3fe0b570
AB
1814 while (!tp->is_done) {
1815 ndone = poll(tp->pfds, ndevs, to_val);
1816 if (ndone || piped_output)
1817 (void)handle_pfds(tp, ndone, piped_output);
1818 else if (ndone < 0 && errno != EINTR)
1819 fprintf(stderr, "Thread %d poll failed: %d/%s\n",
1820 tp->cpu, errno, strerror(errno));
1821 }
22cd0c02
JA
1822
1823 /*
3fe0b570 1824 * Trace is stopped, pull data until we get a short read
22cd0c02 1825 */
3fe0b570
AB
1826 while (handle_pfds(tp, ndevs, 1) > 0)
1827 ;
055cc3e5 1828
3fe0b570 1829 close_ios(tp);
df81fdb5
AB
1830 tracer_signal_ready(tp, Th_leaving, 0);
1831 return NULL;
8e86c98a 1832
3fe0b570 1833err:
df81fdb5 1834 tracer_signal_ready(tp, Th_error, ret);
3fe0b570 1835 return NULL;
22cd0c02
JA
1836}
1837
3fe0b570 1838static int start_tracer(int cpu)
22cd0c02 1839{
3fe0b570 1840 struct tracer *tp;
22cd0c02 1841
3fe0b570
AB
1842 tp = malloc(sizeof(*tp));
1843 memset(tp, 0, sizeof(*tp));
7ab2f837 1844
3fe0b570 1845 INIT_LIST_HEAD(&tp->head);
3fe0b570
AB
1846 tp->status = 0;
1847 tp->cpu = cpu;
8e86c98a 1848
3fe0b570
AB
1849 if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
1850 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
1851 cpu, errno, strerror(errno));
df81fdb5
AB
1852 free(tp);
1853 return 1;
8e86c98a 1854 }
3fe0b570 1855
df81fdb5
AB
1856 list_add_tail(&tp->head, &tracers);
1857 return 0;
8e86c98a
JA
1858}
1859
df81fdb5 1860static void start_tracers(void)
e0a1988b 1861{
3fe0b570 1862 int cpu;
df81fdb5 1863 struct list_head *p;
3fe0b570
AB
1864
1865 for (cpu = 0; cpu < ncpus; cpu++)
1866 if (start_tracer(cpu))
1867 break;
e0a1988b 1868
df81fdb5
AB
1869 wait_tracers_ready(cpu);
1870
1871 __list_for_each(p, &tracers) {
1872 struct tracer *tp = list_entry(p, struct tracer, head);
1873 if (tp->status)
1874 fprintf(stderr,
1875 "FAILED to start thread on CPU %d: %d/%s\n",
1876 tp->cpu, tp->status, strerror(tp->status));
1877 }
3fe0b570 1878}
e0a1988b 1879
3fe0b570
AB
1880static void stop_tracers(void)
1881{
1882 struct list_head *p;
e0a1988b
JA
1883
1884 /*
3fe0b570 1885 * Stop the tracing - makes the tracer threads clean up quicker.
e0a1988b 1886 */
3fe0b570
AB
1887 __list_for_each(p, &devpaths) {
1888 struct devpath *dpp = list_entry(p, struct devpath, head);
1889 (void)ioctl(dpp->fd, BLKTRACESTOP);
e0a1988b
JA
1890 }
1891
3fe0b570
AB
1892 /*
1893 * Tell each tracer to quit
1894 */
1895 __list_for_each(p, &tracers) {
1896 struct tracer *tp = list_entry(p, struct tracer, head);
1897 tp->is_done = 1;
1898 }
ff11d54c 1899}
e0a1988b 1900
3fe0b570 1901static void del_tracers(void)
ff11d54c 1902{
3fe0b570 1903 struct list_head *p, *q;
ff11d54c 1904
3fe0b570
AB
1905 list_for_each_safe(p, q, &tracers) {
1906 struct tracer *tp = list_entry(p, struct tracer, head);
ff11d54c 1907
3fe0b570
AB
1908 list_del(&tp->head);
1909 free(tp);
e0a1988b 1910 }
ff11d54c 1911}
e0a1988b 1912
3fe0b570 1913static void wait_tracers(void)
ff11d54c 1914{
3fe0b570 1915 struct list_head *p;
ff11d54c 1916
3fe0b570
AB
1917 if (use_tracer_devpaths())
1918 process_trace_bufs();
1919
df81fdb5
AB
1920 wait_tracers_leaving();
1921
3fe0b570
AB
1922 __list_for_each(p, &tracers) {
1923 int ret;
1924 struct tracer *tp = list_entry(p, struct tracer, head);
1925
3fe0b570
AB
1926 ret = pthread_join(tp->thread, NULL);
1927 if (ret)
1928 fprintf(stderr, "Thread join %d failed %d\n",
1929 tp->cpu, ret);
ff11d54c
TZ
1930 }
1931
3fe0b570
AB
1932 if (use_tracer_devpaths())
1933 clean_trace_bufs();
1934
1935 get_all_drops();
ff11d54c
TZ
1936}
1937
3fe0b570 1938static void exit_tracing(void)
ff11d54c 1939{
3fe0b570
AB
1940 signal(SIGINT, SIG_IGN);
1941 signal(SIGHUP, SIG_IGN);
1942 signal(SIGTERM, SIG_IGN);
1943 signal(SIGALRM, SIG_IGN);
1944
1945 stop_tracers();
1946 wait_tracers();
1947 del_tracers();
1948 rel_devpaths();
e0a1988b
JA
1949}
1950
3fe0b570 1951static void handle_sigint(__attribute__((__unused__)) int sig)
8e86c98a 1952{
3fe0b570
AB
1953 done = 1;
1954 stop_tracers();
8e86c98a
JA
1955}
1956
3fe0b570 1957static void show_stats(struct list_head *devpaths)
659bcc3f 1958{
3fe0b570
AB
1959 FILE *ofp;
1960 struct list_head *p;
1961 unsigned long long nevents, data_read;
1962 unsigned long long total_drops = 0;
1963 unsigned long long total_events = 0;
1964
1965 if (piped_output)
1966 ofp = my_fopen("/dev/null", "w");
1967 else
1968 ofp = stdout;
ff11d54c 1969
3fe0b570
AB
1970 __list_for_each(p, devpaths) {
1971 int cpu;
1972 struct pdc_stats *sp;
1973 struct devpath *dpp = list_entry(p, struct devpath, head);
e0a1988b 1974
3fe0b570
AB
1975 if (net_mode == Net_server)
1976 printf("server: end of run for %s:%s\n",
1977 dpp->ch->hostname, dpp->buts_name);
e0a1988b 1978
3fe0b570
AB
1979 data_read = 0;
1980 nevents = 0;
1981
1982 fprintf(ofp, "=== %s ===\n", dpp->buts_name);
1983 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
1984 /*
1985 * Estimate events if not known...
1986 */
1987 if (sp->nevents == 0) {
1988 sp->nevents = sp->data_read /
1989 sizeof(struct blk_io_trace);
ff11d54c 1990 }
e0a1988b 1991
3fe0b570
AB
1992 fprintf(ofp,
1993 " CPU%3d: %20llu events, %8llu KiB data\n",
1994 cpu, sp->nevents, (sp->data_read + 1023) >> 10);
e0a1988b 1995
3fe0b570
AB
1996 data_read += sp->data_read;
1997 nevents += sp->nevents;
e0a1988b
JA
1998 }
1999
3fe0b570
AB
2000 fprintf(ofp, " Total: %20llu events (dropped %llu),"
2001 " %8llu KiB data\n", nevents,
2002 dpp->drops, (data_read + 1024) >> 10);
8e86c98a 2003
3fe0b570
AB
2004 total_drops += dpp->drops;
2005 total_events += (nevents + dpp->drops);
8e86c98a
JA
2006 }
2007
3fe0b570
AB
2008 fflush(ofp);
2009 if (piped_output)
2010 fclose(ofp);
8e86c98a 2011
3fe0b570
AB
2012 if (total_drops) {
2013 double drops_ratio = 1.0;
8e86c98a 2014
3fe0b570
AB
2015 if (total_events)
2016 drops_ratio = (double)total_drops/(double)total_events;
8e86c98a 2017
3fe0b570
AB
2018 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
2019 "Consider using a larger buffer size (-b) "
2020 "and/or more buffers (-n)\n",
2021 total_drops, 100.0 * drops_ratio);
8e86c98a 2022 }
8e86c98a
JA
2023}
2024
3fe0b570 2025static int handle_args(int argc, char *argv[])
8e86c98a 2026{
3fe0b570 2027 int c, i;
e3e74029 2028 struct statfs st;
d39c04ca
AB
2029 int act_mask_tmp = 0;
2030
2031 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
2032 switch (c) {
2033 case 'a':
2034 i = find_mask_map(optarg);
2035 if (i < 0) {
3fe0b570 2036 fprintf(stderr, "Invalid action mask %s\n",
d39c04ca 2037 optarg);
7425d456 2038 return 1;
d39c04ca
AB
2039 }
2040 act_mask_tmp |= i;
2041 break;
2042
2043 case 'A':
3fe0b570 2044 if ((sscanf(optarg, "%x", &i) != 1) ||
98f8386b 2045 !valid_act_opt(i)) {
d39c04ca 2046 fprintf(stderr,
ab197ca7 2047 "Invalid set action mask %s/0x%x\n",
d39c04ca 2048 optarg, i);
7425d456 2049 return 1;
d39c04ca
AB
2050 }
2051 act_mask_tmp = i;
2052 break;
d0ca268b 2053
d39c04ca 2054 case 'd':
3fe0b570 2055 if (add_devpath(optarg) != 0)
e7c9f3ff 2056 return 1;
d39c04ca
AB
2057 break;
2058
cf1edb17
AB
2059 case 'I': {
2060 char dev_line[256];
3fe0b570 2061 FILE *ifp = my_fopen(optarg, "r");
cf1edb17
AB
2062
2063 if (!ifp) {
3fe0b570
AB
2064 fprintf(stderr,
2065 "Invalid file for devices %s\n",
cf1edb17
AB
2066 optarg);
2067 return 1;
2068 }
2069
2070 while (fscanf(ifp, "%s\n", dev_line) == 1)
3fe0b570 2071 if (add_devpath(dev_line) != 0)
cf1edb17
AB
2072 return 1;
2073 break;
2074 }
cf1edb17 2075
5270dddd 2076 case 'r':
3d06efea 2077 debugfs_path = optarg;
5270dddd
JA
2078 break;
2079
d5396421 2080 case 'o':
66efebf8 2081 output_name = optarg;
d5396421 2082 break;
bc39777c
JA
2083 case 'k':
2084 kill_running_trace = 1;
2085 break;
ece238a6
NS
2086 case 'w':
2087 stop_watch = atoi(optarg);
2088 if (stop_watch <= 0) {
2089 fprintf(stderr,
2090 "Invalid stopwatch value (%d secs)\n",
2091 stop_watch);
2092 return 1;
2093 }
2094 break;
57ea8602 2095 case 'V':
5d4f19d9 2096 case 'v':
52724a0e 2097 printf("%s version %s\n", argv[0], blktrace_version);
3fe0b570
AB
2098 exit(0);
2099 /*NOTREACHED*/
129aa440 2100 case 'b':
eb3c8108 2101 buf_size = strtoul(optarg, NULL, 10);
183a0855 2102 if (buf_size <= 0 || buf_size > 16*1024) {
3fe0b570
AB
2103 fprintf(stderr, "Invalid buffer size (%lu)\n",
2104 buf_size);
129aa440
JA
2105 return 1;
2106 }
2107 buf_size <<= 10;
2108 break;
2109 case 'n':
eb3c8108 2110 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
2111 if (buf_nr <= 0) {
2112 fprintf(stderr,
eb3c8108 2113 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
2114 return 1;
2115 }
2116 break;
d1d7f15f
JA
2117 case 'D':
2118 output_dir = optarg;
2119 break;
8e86c98a
JA
2120 case 'h':
2121 net_mode = Net_client;
2122 strcpy(hostname, optarg);
2123 break;
2124 case 'l':
2125 net_mode = Net_server;
2126 break;
2127 case 'p':
2128 net_port = atoi(optarg);
2129 break;
32f18c48 2130 case 's':
79971f43 2131 net_use_sendfile = 0;
32f18c48 2132 break;
d39c04ca 2133 default:
ee1f4158 2134 show_usage(argv[0]);
3fe0b570
AB
2135 exit(1);
2136 /*NOTREACHED*/
d39c04ca
AB
2137 }
2138 }
2139
3fe0b570
AB
2140 while (optind < argc)
2141 if (add_devpath(argv[optind++]) != 0)
2142 return 1;
8e86c98a 2143
3fe0b570
AB
2144 if (net_mode != Net_server && ndevs == 0) {
2145 show_usage(argv[0]);
2146 return 1;
2147 }
8e86c98a 2148
3fe0b570
AB
2149 if (statfs(debugfs_path, &st) < 0 || st.f_type != (long)DEBUGFS_TYPE) {
2150 fprintf(stderr, "Invalid debug path %s: %d/%s\n",
2151 debugfs_path, errno, strerror(errno));
2152 return 1;
2153 }
2154
2155 if (act_mask_tmp != 0)
2156 act_mask = act_mask_tmp;
2157
e58f3937
AB
2158 if (net_mode == Net_client && net_setup_addr())
2159 return 1;
2160
3fe0b570
AB
2161 /*
2162 * Set up for appropriate PFD handler based upon output name.
2163 */
2164 if (net_client_use_sendfile())
2165 handle_pfds = handle_pfds_netclient;
2166 else if (net_client_use_send())
2167 handle_pfds = handle_pfds_entries;
2168 else if (output_name && (strcmp(output_name, "-") == 0)) {
2169 piped_output = 1;
2170 handle_pfds = handle_pfds_entries;
2171 pfp = stdout;
2172 setvbuf(pfp, NULL, _IONBF, 0);
2173 } else
2174 handle_pfds = handle_pfds_file;
2175 return 0;
2176}
2177
2178static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
2179 int fd)
2180{
2181 struct cl_conn *nc;
2182
2183 nc = malloc(sizeof(*nc));
2184 memset(nc, 0, sizeof(*nc));
2185
2186 time(&nc->connect_time);
2187 nc->ch = ch;
2188 nc->fd = fd;
2189 nc->ncpus = -1;
2190
2191 list_add_tail(&nc->ch_head, &ch->conn_list);
2192 ch->connects++;
2193
2194 list_add_tail(&nc->ns_head, &ns->conn_list);
2195 ns->connects++;
2196 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2197}
2198
2199static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
2200 struct cl_conn *nc)
2201{
2202 net_close_connection(&nc->fd);
2203
2204 list_del(&nc->ch_head);
2205 ch->connects--;
2206
2207 list_del(&nc->ns_head);
2208 ns->connects--;
2209 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2210
2211 free(nc);
2212}
2213
2214static struct cl_host *net_find_client_host(struct net_server_s *ns,
2215 struct in_addr cl_in_addr)
2216{
2217 struct list_head *p;
2218
2219 __list_for_each(p, &ns->ch_list) {
2220 struct cl_host *ch = list_entry(p, struct cl_host, head);
2221
2222 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
2223 return ch;
2224 }
2225
2226 return NULL;
2227}
2228
2229static struct cl_host *net_add_client_host(struct net_server_s *ns,
2230 struct sockaddr_in *addr)
2231{
2232 struct cl_host *ch;
2233
2234 ch = malloc(sizeof(*ch));
2235 memset(ch, 0, sizeof(*ch));
2236
2237 ch->ns = ns;
2238 ch->cl_in_addr = addr->sin_addr;
2239 list_add_tail(&ch->head, &ns->ch_list);
2240 ns->nchs++;
ec685dd2 2241
3fe0b570
AB
2242 ch->hostname = strdup(inet_ntoa(addr->sin_addr));
2243 printf("server: connection from %s\n", ch->hostname);
2244
2245 INIT_LIST_HEAD(&ch->conn_list);
2246 INIT_LIST_HEAD(&ch->devpaths);
2247
2248 return ch;
2249}
2250
2251static void device_done(struct devpath *dpp, int ncpus)
2252{
2253 int cpu;
2254 struct io_info *iop;
2255
2256 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
2257 close_iop(iop);
2258
2259 list_del(&dpp->head);
2260 dpp_free(dpp);
2261}
2262
2263static void net_ch_remove(struct cl_host *ch, int ncpus)
2264{
2265 struct list_head *p, *q;
2266 struct net_server_s *ns = ch->ns;
2267
2268 list_for_each_safe(p, q, &ch->devpaths) {
2269 struct devpath *dpp = list_entry(p, struct devpath, head);
2270 device_done(dpp, ncpus);
ec685dd2 2271 }
8e86c98a 2272
3fe0b570
AB
2273 list_for_each_safe(p, q, &ch->conn_list) {
2274 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
2275
2276 ch_rem_connection(ns, ch, nc);
22cd0c02
JA
2277 }
2278
3fe0b570
AB
2279 list_del(&ch->head);
2280 ns->nchs--;
2281
2282 if (ch->hostname)
2283 free(ch->hostname);
2284 free(ch);
2285}
2286
2287static void net_add_connection(struct net_server_s *ns)
2288{
2289 int fd;
2290 struct cl_host *ch;
2291 socklen_t socklen = sizeof(ns->addr);
2292
d5302b03 2293 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
3fe0b570
AB
2294 if (fd < 0) {
2295 /*
2296 * This is OK: we just won't accept this connection,
2297 * nothing fatal.
2298 */
2299 perror("accept");
2300 } else {
2301 ch = net_find_client_host(ns, ns->addr.sin_addr);
2302 if (!ch)
2303 ch = net_add_client_host(ns, &ns->addr);
2304
2305 ch_add_connection(ns, ch, fd);
d39c04ca 2306 }
3fe0b570 2307}
d39c04ca 2308
3fe0b570
AB
2309static struct devpath *nc_add_dpp(struct cl_conn *nc,
2310 struct blktrace_net_hdr *bnh,
2311 time_t connect_time)
2312{
2313 int cpu;
2314 struct io_info *iop;
2315 struct devpath *dpp;
2316
2317 dpp = malloc(sizeof(*dpp));
2318 memset(dpp, 0, sizeof(*dpp));
2319
2320 dpp->buts_name = strdup(bnh->buts_name);
2321 dpp->path = strdup(bnh->buts_name);
2322 dpp->fd = -1;
2323 dpp->ch = nc->ch;
2324 dpp->cl_id = bnh->cl_id;
2325 dpp->cl_connect_time = connect_time;
2326 dpp->ncpus = nc->ncpus;
2327 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
2328 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
2329
2330 list_add_tail(&dpp->head, &nc->ch->devpaths);
2331 nc->ch->ndevs++;
2332
2333 dpp->ios = calloc(nc->ncpus, sizeof(*iop));
2334 memset(dpp->ios, 0, ndevs * sizeof(*iop));
2335
2336 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
2337 iop->dpp = dpp;
2338 iop->nc = nc;
2339 init_mmap_info(&iop->mmap_info);
2340
2341 if (iop_open(iop, cpu))
2342 goto err;
69dd57c2
AB
2343 }
2344
3fe0b570 2345 return dpp;
69dd57c2 2346
3fe0b570
AB
2347err:
2348 /*
2349 * Need to unravel what's been done...
2350 */
2351 while (cpu >= 0)
2352 close_iop(&dpp->ios[cpu--]);
2353 dpp_free(dpp);
2354
2355 return NULL;
2356}
d0ca268b 2357
3fe0b570
AB
2358static struct devpath *nc_find_dpp(struct cl_conn *nc,
2359 struct blktrace_net_hdr *bnh)
2360{
2361 struct list_head *p;
2362 time_t connect_time = nc->connect_time;
3d06efea 2363
3fe0b570
AB
2364 __list_for_each(p, &nc->ch->devpaths) {
2365 struct devpath *dpp = list_entry(p, struct devpath, head);
2366
2367 if (!strcmp(dpp->buts_name, bnh->buts_name))
2368 return dpp;
2369
2370 if (dpp->cl_id == bnh->cl_id)
2371 connect_time = dpp->cl_connect_time;
d0ca268b
JA
2372 }
2373
3fe0b570
AB
2374 return nc_add_dpp(nc, bnh, connect_time);
2375}
bc39777c 2376
3fe0b570
AB
2377static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
2378 struct blktrace_net_hdr *bnh)
2379{
2380 int ret;
2381 struct io_info *iop = &dpp->ios[bnh->cpu];
2382 struct mmap_info *mip = &iop->mmap_info;
2383
2384 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info)) {
2385 fprintf(stderr, "ncd(%s:%d): mmap failed\n",
2386 nc->ch->hostname, nc->fd);
2387 exit(1);
2388 }
2389
2390 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
2391 if (ret > 0) {
2392 pdc_dr_update(dpp, bnh->cpu, ret);
2393 mip->fs_size += ret;
2394 mip->fs_off += ret;
2395 } else if (ret < 0)
2396 exit(1);
2397}
2398
2399/*
2400 * Returns 1 if we closed a host - invalidates other polling information
2401 * that may be present.
2402 */
2403static int net_client_data(struct cl_conn *nc)
2404{
2405 int ret;
2406 struct devpath *dpp;
2407 struct blktrace_net_hdr bnh;
2408
2409 ret = net_get_header(nc, &bnh);
2410 if (ret == 0)
7425d456 2411 return 0;
3fe0b570
AB
2412
2413 if (ret < 0) {
2414 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
2415 exit(1);
2416 }
2417
2418 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
2419 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
2420 exit(1);
2421 }
2422
2423 if (!data_is_native) {
2424 bnh.magic = be32_to_cpu(bnh.magic);
2425 bnh.cpu = be32_to_cpu(bnh.cpu);
2426 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
2427 bnh.len = be32_to_cpu(bnh.len);
2428 bnh.cl_id = be32_to_cpu(bnh.cl_id);
2429 bnh.buf_size = be32_to_cpu(bnh.buf_size);
2430 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
2431 bnh.page_size = be32_to_cpu(bnh.page_size);
2432 }
2433
2434 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2435 fprintf(stderr, "ncd(%s:%d): bad data magic\n",
2436 nc->ch->hostname, nc->fd);
2437 exit(1);
2438 }
2439
2440 if (nc->ncpus == -1)
2441 nc->ncpus = bnh.max_cpus;
2442
2443 /*
2444 * len == 0 means the other end is sending us a new connection/dpp
2445 * len == 1 means that the other end signalled end-of-run
2446 */
2447 dpp = nc_find_dpp(nc, &bnh);
2448 if (bnh.len == 0) {
2449 /*
2450 * Just adding in the dpp above is enough
2451 */
2452 ack_open_close(nc->fd, dpp->buts_name);
2453 nc->ch->cl_opens++;
2454 } else if (bnh.len == 1) {
2455 /*
2456 * overload cpu count with dropped events
2457 */
2458 dpp->drops = bnh.cpu;
2459
2460 ack_open_close(nc->fd, dpp->buts_name);
2461 if (--nc->ch->cl_opens == 0) {
2462 show_stats(&nc->ch->devpaths);
2463 net_ch_remove(nc->ch, nc->ncpus);
2464 return 1;
2465 }
2466 } else
2467 net_client_read_data(nc, dpp, &bnh);
2468
2469 return 0;
2470}
2471
2472static void handle_client_data(struct net_server_s *ns, int events)
2473{
2474 struct cl_conn *nc;
2475 struct pollfd *pfd;
2476 struct list_head *p, *q;
2477
2478 pfd = &ns->pfds[1];
2479 list_for_each_safe(p, q, &ns->conn_list) {
2480 if (pfd->revents & POLLIN) {
2481 nc = list_entry(p, struct cl_conn, ns_head);
2482
2483 if (net_client_data(nc) || --events == 0)
2484 break;
2485 }
2486 pfd++;
2487 }
2488}
2489
2490static void net_setup_pfds(struct net_server_s *ns)
2491{
2492 struct pollfd *pfd;
2493 struct list_head *p;
2494
2495 ns->pfds[0].fd = ns->listen_fd;
2496 ns->pfds[0].events = POLLIN;
2497
2498 pfd = &ns->pfds[1];
2499 __list_for_each(p, &ns->conn_list) {
2500 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
2501
2502 pfd->fd = nc->fd;
2503 pfd->events = POLLIN;
2504 pfd++;
2505 }
2506}
2507
2508static int net_server_handle_connections(struct net_server_s *ns)
2509{
2510 int events;
2511
2512 printf("server: waiting for connections...\n");
2513
2514 while (!done) {
2515 net_setup_pfds(ns);
2516 events = poll(ns->pfds, ns->connects + 1, -1);
2517 if (events < 0) {
2518 if (errno != EINTR) {
2519 perror("FATAL: poll error");
2520 return 1;
2521 }
2522 } else if (events > 0) {
2523 if (ns->pfds[0].revents & POLLIN) {
2524 net_add_connection(ns);
2525 events--;
2526 }
2527
2528 if (events)
2529 handle_client_data(ns, events);
2530 }
2531 }
2532
2533 return 0;
2534}
2535
2536static int net_server(void)
2537{
2538 int fd, opt;
2539 int ret = 1;
2540 struct net_server_s net_server;
2541 struct net_server_s *ns = &net_server;
2542
2543 memset(ns, 0, sizeof(*ns));
2544 INIT_LIST_HEAD(&ns->ch_list);
2545 INIT_LIST_HEAD(&ns->conn_list);
2546 ns->pfds = malloc(sizeof(struct pollfd));
2547
2548 fd = my_socket(AF_INET, SOCK_STREAM, 0);
2549 if (fd < 0) {
2550 perror("server: socket");
2551 goto out;
2552 }
2553
2554 opt = 1;
2555 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
2556 perror("setsockopt");
2557 goto out;
2558 }
2559
2560 memset(&ns->addr, 0, sizeof(ns->addr));
2561 ns->addr.sin_family = AF_INET;
2562 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
2563 ns->addr.sin_port = htons(net_port);
2564
2565 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
2566 perror("bind");
2567 goto out;
2568 }
2569
2570 if (listen(fd, 1) < 0) {
2571 perror("listen");
2572 goto out;
2573 }
2574
2575 /*
2576 * The actual server looping is done here:
2577 */
2578 ns->listen_fd = fd;
2579 ret = net_server_handle_connections(ns);
2580
2581 /*
2582 * Clean up and return...
2583 */
2584out:
2585 free(ns->pfds);
2586 return ret;
2587}
2588
055cc3e5
AB
2589static int run_tracers(void)
2590{
2591 atexit(exit_tracing);
2592 if (net_mode == Net_client)
2593 printf("blktrace: connecting to %s\n", hostname);
2594
2595 setup_buts();
2596
2597 if (use_tracer_devpaths()) {
2598 if (setup_tracer_devpaths())
2599 return 1;
2600
2601 if (piped_output)
2602 handle_list = handle_list_file;
2603 else
2604 handle_list = handle_list_net;
2605 }
2606
2607 start_tracers();
2608 if (nthreads_running == ncpus) {
2609 unblock_tracers();
2610 start_buts();
2611 if (net_mode == Net_client)
2612 printf("blktrace: connected!\n");
2613 if (stop_watch)
2614 alarm(stop_watch);
2615 } else
2616 stop_tracers();
2617
2618 wait_tracers();
2619 if (nthreads_running == ncpus)
2620 show_stats(&devpaths);
2621 if (net_client_use_send())
2622 close_client_connections();
2623 del_tracers();
2624
2625 return 0;
2626}
2627
3fe0b570
AB
2628int main(int argc, char *argv[])
2629{
2630 int ret = 0;
2631
2632 setlocale(LC_NUMERIC, "en_US");
2633 pagesize = getpagesize();
2634 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
2635 if (ncpus < 0) {
2636 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed %d/%s\n",
2637 errno, strerror(errno));
2638 ret = 1;
2639 goto out;
055cc3e5 2640 } else if (handle_args(argc, argv)) {
3fe0b570
AB
2641 ret = 1;
2642 goto out;
bc39777c
JA
2643 }
2644
ce2151eb
AB
2645 if (ndevs > 1 && output_name && strcmp(output_name, "-") != 0) {
2646 fprintf(stderr, "-o not supported with multiple devices\n");
2647 ret = 1;
2648 goto out;
2649 }
2650
d0ca268b
JA
2651 signal(SIGINT, handle_sigint);
2652 signal(SIGHUP, handle_sigint);
2653 signal(SIGTERM, handle_sigint);
ece238a6 2654 signal(SIGALRM, handle_sigint);
38e1f0c6 2655 signal(SIGPIPE, SIG_IGN);
d0ca268b 2656
3fe0b570
AB
2657 if (kill_running_trace) {
2658 struct devpath *dpp;
2659 struct list_head *p;
8e86c98a 2660
3fe0b570
AB
2661 __list_for_each(p, &devpaths) {
2662 dpp = list_entry(p, struct devpath, head);
2663 if (__stop_trace(dpp->fd)) {
2664 fprintf(stderr,
2665 "BLKTRACETEARDOWN %s failed: %d/%s\n",
2666 dpp->path, errno, strerror(errno));
2667 }
2668 }
2669 } else if (net_mode == Net_server) {
2670 if (output_name) {
2671 fprintf(stderr, "-o ignored in server mode\n");
2672 output_name = NULL;
2673 }
3fe0b570 2674 ret = net_server();
055cc3e5
AB
2675 } else
2676 ret = run_tracers();
d0ca268b 2677
3fe0b570
AB
2678out:
2679 if (pfp)
2680 fclose(pfp);
2681 rel_devpaths();
2682 return ret;
2683}