blktrace: don't stop tracer if not setup trace successfully
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd 4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
46e37c55 5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
d956a2cd 6 *
3fe0b570
AB
7 * Rewrite to have a single thread per CPU (managing all devices on that CPU)
8 * Alan D. Brunelle <alan.brunelle@hp.com> - January 2009
9 *
d956a2cd
JA
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
d0ca268b 24 */
3fe0b570
AB
25
26#include <errno.h>
27#include <stdarg.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <fcntl.h>
32#include <getopt.h>
33#include <sched.h>
d0ca268b 34#include <unistd.h>
3fe0b570 35#include <poll.h>
d0ca268b 36#include <signal.h>
3fe0b570
AB
37#include <pthread.h>
38#include <locale.h>
d0ca268b 39#include <sys/ioctl.h>
3fe0b570
AB
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/vfs.h>
b7106311 43#include <sys/mman.h>
3fe0b570
AB
44#include <sys/param.h>
45#include <sys/time.h>
46#include <sys/resource.h>
8e86c98a 47#include <sys/socket.h>
8e86c98a
JA
48#include <netinet/in.h>
49#include <arpa/inet.h>
50#include <netdb.h>
32f18c48 51#include <sys/sendfile.h>
d0ca268b 52
3fe0b570 53#include "btt/list.h"
d0ca268b 54#include "blktrace.h"
52724a0e 55
8f551a39
JA
56/*
57 * You may want to increase this even more, if you are logging at a high
58 * rate and see skipped/missed events
59 */
3fe0b570
AB
60#define BUF_SIZE (512 * 1024)
61#define BUF_NR (4)
62
63#define FILE_VBUF_SIZE (128 * 1024)
64
65#define DEBUGFS_TYPE (0x64626720)
66#define TRACE_NET_PORT (8462)
67
68enum {
69 Net_none = 0,
70 Net_server,
71 Net_client,
72};
73
df81fdb5
AB
74enum thread_status {
75 Th_running,
76 Th_leaving,
77 Th_error
78};
79
3fe0b570
AB
80/*
81 * Generic stats collected: nevents can be _roughly_ estimated by data_read
82 * (discounting pdu...)
83 *
84 * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
85 */
86struct pdc_stats {
87 unsigned long long data_read;
88 unsigned long long nevents;
89};
90
91struct devpath {
92 struct list_head head;
93 char *path; /* path to device special file */
94 char *buts_name; /* name returned from bt kernel code */
95 struct pdc_stats *stats;
70598a36 96 int fd, ncpus;
3fe0b570
AB
97 unsigned long long drops;
98
99 /*
100 * For piped output only:
101 *
102 * Each tracer will have a tracer_devpath_head that it will add new
103 * data onto. It's list is protected above (tracer_devpath_head.mutex)
104 * and it will signal the processing thread using the dp_cond,
105 * dp_mutex & dp_entries variables above.
106 */
107 struct tracer_devpath_head *heads;
108
109 /*
110 * For network server mode only:
111 */
112 struct cl_host *ch;
113 u32 cl_id;
114 time_t cl_connect_time;
e63098f3 115 int setup_done; /* ioctl BLKTRACESETUP done */
3fe0b570
AB
116 struct io_info *ios;
117};
118
119/*
120 * For piped output to stdout we will have each tracer thread (one per dev)
121 * tack buffers read from the relay queues on a per-device list.
122 *
123 * The main thread will then collect trace buffers from each of lists in turn.
124 *
125 * We will use a mutex to guard each of the trace_buf list. The tracers
126 * can then signal the main thread using <dp_cond,dp_mutex> and
127 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
128 * signal. When dp_entries is 0, the main thread will wait for that condition
129 * to be signalled.)
130 *
131 * adb: It may be better just to have a large buffer per tracer per dev,
132 * and then use it as a ring-buffer. This would certainly cut down a lot
133 * of malloc/free thrashing, at the cost of more memory movements (potentially).
134 */
135struct trace_buf {
136 struct list_head head;
137 struct devpath *dpp;
138 void *buf;
139 int cpu, len;
140};
141
142struct tracer_devpath_head {
143 pthread_mutex_t mutex;
144 struct list_head head;
145 struct trace_buf *prev;
146};
147
148/*
149 * Used to handle the mmap() interfaces for output file (containing traces)
150 */
151struct mmap_info {
152 void *fs_buf;
153 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
154 unsigned long buf_size, buf_nr;
155 int pagesize;
156};
157
158/*
159 * Each thread doing work on a (client) side of blktrace will have one
160 * of these. The ios array contains input/output information, pfds holds
161 * poll() data. The volatile's provide flags to/from the main executing
162 * thread.
163 */
164struct tracer {
165 struct list_head head;
166 struct io_info *ios;
167 struct pollfd *pfds;
168 pthread_t thread;
3fe0b570 169 int cpu, nios;
df81fdb5 170 volatile int status, is_done;
3fe0b570
AB
171};
172
173/*
174 * networking stuff follows. we include a magic number so we know whether
175 * to endianness convert or not.
176 *
177 * The len field is overloaded:
178 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu
179 * 1 - Indicates a "close" - Shut down connection orderly
180 *
181 * The cpu field is overloaded on close: it will contain the number of drops.
182 */
183struct blktrace_net_hdr {
184 u32 magic; /* same as trace magic */
185 char buts_name[32]; /* trace name */
186 u32 cpu; /* for which cpu */
187 u32 max_cpus;
188 u32 len; /* length of following trace data */
189 u32 cl_id; /* id for set of client per-cpu connections */
190 u32 buf_size; /* client buf_size for this trace */
191 u32 buf_nr; /* client buf_nr for this trace */
192 u32 page_size; /* client page_size for this trace */
193};
194
195/*
196 * Each host encountered has one of these. The head is used to link this
197 * on to the network server's ch_list. Connections associated with this
198 * host are linked on conn_list, and any devices traced on that host
199 * are connected on the devpaths list.
200 */
201struct cl_host {
202 struct list_head head;
203 struct list_head conn_list;
204 struct list_head devpaths;
205 struct net_server_s *ns;
206 char *hostname;
207 struct in_addr cl_in_addr;
208 int connects, ndevs, cl_opens;
209};
210
211/*
212 * Each connection (client to server socket ('fd')) has one of these. A
213 * back reference to the host ('ch'), and lists headers (for the host
214 * list, and the network server conn_list) are also included.
215 */
216struct cl_conn {
217 struct list_head ch_head, ns_head;
218 struct cl_host *ch;
219 int fd, ncpus;
220 time_t connect_time;
221};
222
223/*
224 * The network server requires some poll structures to be maintained -
225 * one per conection currently on conn_list. The nchs/ch_list values
226 * are for each host connected to this server. The addr field is used
227 * for scratch as new connections are established.
228 */
229struct net_server_s {
230 struct list_head conn_list;
231 struct list_head ch_list;
232 struct pollfd *pfds;
233 int listen_fd, connects, nchs;
234 struct sockaddr_in addr;
235};
236
237/*
238 * This structure is (generically) used to providide information
239 * for a read-to-write set of values.
240 *
241 * ifn & ifd represent input information
242 *
243 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
244 */
245struct io_info {
246 struct devpath *dpp;
247 FILE *ofp;
248 char *obuf;
249 struct cl_conn *nc; /* Server network connection */
250
251 /*
252 * mmap controlled output files
253 */
254 struct mmap_info mmap_info;
255
256 /*
257 * Client network fields
258 */
259 unsigned int ready;
260 unsigned long long data_queued;
261
262 /*
263 * Input/output file descriptors & names
264 */
265 int ifd, ofd;
266 char ifn[MAXPATHLEN + 64];
267 char ofn[MAXPATHLEN + 64];
268};
269
270static char blktrace_version[] = "2.0.0";
271
272/*
273 * Linkage to blktrace helper routines (trace conversions)
274 */
275int data_is_native = -1;
276
055cc3e5 277static int ndevs;
d045a704 278static int max_cpus;
3fe0b570 279static int ncpus;
d045a704 280static cpu_set_t *online_cpus;
3fe0b570
AB
281static int pagesize;
282static int act_mask = ~0U;
055cc3e5
AB
283static int kill_running_trace;
284static int stop_watch;
285static int piped_output;
286
3fe0b570
AB
287static char *debugfs_path = "/sys/kernel/debug";
288static char *output_name;
289static char *output_dir;
055cc3e5 290
3fe0b570
AB
291static unsigned long buf_size = BUF_SIZE;
292static unsigned long buf_nr = BUF_NR;
055cc3e5
AB
293
294static FILE *pfp;
295
3fe0b570
AB
296static LIST_HEAD(devpaths);
297static LIST_HEAD(tracers);
055cc3e5 298
3fe0b570 299static volatile int done;
d0ca268b 300
6488ca48
AB
301/*
302 * tracer threads add entries, the main thread takes them off and processes
303 * them. These protect the dp_entries variable.
304 */
3fe0b570
AB
305static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
306static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
307static volatile int dp_entries;
308
6488ca48 309/*
df81fdb5 310 * These synchronize master / thread interactions.
6488ca48 311 */
df81fdb5
AB
312static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER;
313static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER;
314static volatile int nthreads_running;
315static volatile int nthreads_leaving;
316static volatile int nthreads_error;
317static volatile int tracers_run;
6488ca48 318
3fe0b570
AB
319/*
320 * network cmd line params
321 */
e58f3937 322static struct sockaddr_in hostname_addr;
3fe0b570
AB
323static char hostname[MAXHOSTNAMELEN];
324static int net_port = TRACE_NET_PORT;
325static int net_use_sendfile = 1;
326static int net_mode;
327static int *cl_fds;
007c233c 328
3fe0b570
AB
329static int (*handle_pfds)(struct tracer *, int, int);
330static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
e3e74029 331
5d4f19d9 332#define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
d5396421 333static struct option l_opts[] = {
5c86134e 334 {
d39c04ca 335 .name = "dev",
428683db 336 .has_arg = required_argument,
d39c04ca
AB
337 .flag = NULL,
338 .val = 'd'
339 },
cf1edb17
AB
340 {
341 .name = "input-devs",
342 .has_arg = required_argument,
343 .flag = NULL,
344 .val = 'I'
345 },
5c86134e 346 {
d39c04ca 347 .name = "act-mask",
428683db 348 .has_arg = required_argument,
d39c04ca
AB
349 .flag = NULL,
350 .val = 'a'
351 },
5c86134e 352 {
d39c04ca 353 .name = "set-mask",
428683db 354 .has_arg = required_argument,
d39c04ca
AB
355 .flag = NULL,
356 .val = 'A'
357 },
5c86134e 358 {
5270dddd 359 .name = "relay",
428683db 360 .has_arg = required_argument,
5270dddd
JA
361 .flag = NULL,
362 .val = 'r'
363 },
d5396421
JA
364 {
365 .name = "output",
428683db 366 .has_arg = required_argument,
d5396421
JA
367 .flag = NULL,
368 .val = 'o'
369 },
bc39777c
JA
370 {
371 .name = "kill",
428683db 372 .has_arg = no_argument,
bc39777c
JA
373 .flag = NULL,
374 .val = 'k'
375 },
ece238a6
NS
376 {
377 .name = "stopwatch",
428683db 378 .has_arg = required_argument,
ece238a6
NS
379 .flag = NULL,
380 .val = 'w'
381 },
5d4f19d9
JA
382 {
383 .name = "version",
384 .has_arg = no_argument,
385 .flag = NULL,
386 .val = 'v'
387 },
52724a0e
JA
388 {
389 .name = "version",
390 .has_arg = no_argument,
391 .flag = NULL,
57ea8602 392 .val = 'V'
52724a0e 393 },
129aa440 394 {
3f65c585 395 .name = "buffer-size",
129aa440
JA
396 .has_arg = required_argument,
397 .flag = NULL,
398 .val = 'b'
399 },
400 {
3f65c585 401 .name = "num-sub-buffers",
129aa440
JA
402 .has_arg = required_argument,
403 .flag = NULL,
404 .val = 'n'
405 },
d1d7f15f 406 {
3f65c585 407 .name = "output-dir",
d1d7f15f
JA
408 .has_arg = required_argument,
409 .flag = NULL,
410 .val = 'D'
411 },
8e86c98a
JA
412 {
413 .name = "listen",
414 .has_arg = no_argument,
415 .flag = NULL,
416 .val = 'l'
417 },
418 {
419 .name = "host",
420 .has_arg = required_argument,
421 .flag = NULL,
422 .val = 'h'
423 },
424 {
425 .name = "port",
426 .has_arg = required_argument,
427 .flag = NULL,
428 .val = 'p'
429 },
32f18c48 430 {
79971f43 431 .name = "no-sendfile",
32f18c48
JA
432 .has_arg = no_argument,
433 .flag = NULL,
434 .val = 's'
435 },
71ef8b7c
JA
436 {
437 .name = NULL,
438 }
d39c04ca
AB
439};
440
b9a7e9fc
ES
441static char usage_str[] = "\n\n" \
442 "-d <dev> | --dev=<dev>\n" \
443 "[ -r <debugfs path> | --relay=<debugfs path> ]\n" \
444 "[ -o <file> | --output=<file>]\n" \
445 "[ -D <dir> | --output-dir=<dir>\n" \
446 "[ -w <time> | --stopwatch=<time>]\n" \
447 "[ -a <action field> | --act-mask=<action field>]\n" \
448 "[ -A <action mask> | --set-mask=<action mask>]\n" \
449 "[ -b <size> | --buffer-size]\n" \
450 "[ -n <number> | --num-sub-buffers=<number>]\n" \
451 "[ -l | --listen]\n" \
452 "[ -h <hostname> | --host=<hostname>]\n" \
453 "[ -p <port number> | --port=<port number>]\n" \
454 "[ -s | --no-sendfile]\n" \
455 "[ -I <devs file> | --input-devs=<devs file>]\n" \
456 "[ -v <version> | --version]\n" \
457 "[ -V <version> | --version]\n" \
458
3fe0b570
AB
459 "\t-d Use specified device. May also be given last after options\n" \
460 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
461 "\t-o File(s) to send output to\n" \
462 "\t-D Directory to prepend to output file names\n" \
3fe0b570
AB
463 "\t-w Stop after defined time, in seconds\n" \
464 "\t-a Only trace specified actions. See documentation\n" \
465 "\t-A Give trace mask as a single value. See documentation\n" \
4fad2b83
JT
466 "\t-b Sub buffer size in KiB (default 512)\n" \
467 "\t-n Number of sub buffers (default 4)\n" \
3fe0b570
AB
468 "\t-l Run in network listen mode (blktrace server)\n" \
469 "\t-h Run in network client mode, connecting to the given host\n" \
470 "\t-p Network port to use (default 8462)\n" \
471 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
472 "\t-I Add devices found in <devs file>\n" \
b9a7e9fc 473 "\t-v Print program version info\n" \
3fe0b570 474 "\t-V Print program version info\n\n";
9db17354 475
3fe0b570
AB
476static void clear_events(struct pollfd *pfd)
477{
478 pfd->events = 0;
479 pfd->revents = 0;
480}
21f55651 481
3fe0b570
AB
482static inline int net_client_use_sendfile(void)
483{
484 return net_mode == Net_client && net_use_sendfile;
485}
21f55651 486
3fe0b570
AB
487static inline int net_client_use_send(void)
488{
489 return net_mode == Net_client && !net_use_sendfile;
490}
b9d4294e 491
3fe0b570
AB
492static inline int use_tracer_devpaths(void)
493{
494 return piped_output || net_client_use_send();
495}
b9d4294e 496
3fe0b570
AB
497static inline int in_addr_eq(struct in_addr a, struct in_addr b)
498{
499 return a.s_addr == b.s_addr;
500}
007c233c 501
3fe0b570
AB
502static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
503{
504 dpp->stats[cpu].data_read += data_read;
505}
0cc7d25e 506
3fe0b570
AB
507static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
508{
509 dpp->stats[cpu].nevents += nevents;
510}
9db17354 511
3fe0b570
AB
512static void show_usage(char *prog)
513{
bc14c53f 514 fprintf(stderr, "Usage: %s %s", prog, usage_str);
3fe0b570 515}
9db17354 516
df81fdb5
AB
517/*
518 * Create a timespec 'msec' milliseconds into the future
519 */
520static inline void make_timespec(struct timespec *tsp, long delta_msec)
521{
522 struct timeval now;
523
524 gettimeofday(&now, NULL);
525 tsp->tv_sec = now.tv_sec;
526 tsp->tv_nsec = 1000L * now.tv_usec;
527
528 tsp->tv_nsec += (delta_msec * 1000000L);
529 if (tsp->tv_nsec > 1000000000L) {
530 long secs = tsp->tv_nsec / 1000000000L;
531
532 tsp->tv_sec += secs;
533 tsp->tv_nsec -= (secs * 1000000000L);
534 }
535}
536
537/*
538 * Add a timer to ensure wait ends
539 */
540static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
541{
542 struct timespec ts;
543
544 make_timespec(&ts, 50);
545 pthread_cond_timedwait(cond, mutex, &ts);
546}
547
548static void unblock_tracers(void)
549{
550 pthread_mutex_lock(&mt_mutex);
551 tracers_run = 1;
552 pthread_cond_broadcast(&mt_cond);
553 pthread_mutex_unlock(&mt_mutex);
554}
555
556static void tracer_wait_unblock(struct tracer *tp)
557{
558 pthread_mutex_lock(&mt_mutex);
559 while (!tp->is_done && !tracers_run)
560 pthread_cond_wait(&mt_cond, &mt_mutex);
561 pthread_mutex_unlock(&mt_mutex);
562}
563
564static void tracer_signal_ready(struct tracer *tp,
565 enum thread_status th_status,
566 int status)
567{
568 pthread_mutex_lock(&mt_mutex);
569 tp->status = status;
570
571 if (th_status == Th_running)
572 nthreads_running++;
573 else if (th_status == Th_error)
574 nthreads_error++;
575 else
576 nthreads_leaving++;
577
578 pthread_cond_signal(&mt_cond);
579 pthread_mutex_unlock(&mt_mutex);
580}
581
582static void wait_tracers_ready(int ncpus_started)
583{
584 pthread_mutex_lock(&mt_mutex);
585 while ((nthreads_running + nthreads_error) < ncpus_started)
586 t_pthread_cond_wait(&mt_cond, &mt_mutex);
587 pthread_mutex_unlock(&mt_mutex);
588}
589
590static void wait_tracers_leaving(void)
591{
592 pthread_mutex_lock(&mt_mutex);
593 while (nthreads_leaving < nthreads_running)
594 t_pthread_cond_wait(&mt_cond, &mt_mutex);
595 pthread_mutex_unlock(&mt_mutex);
596}
597
3fe0b570
AB
598static void init_mmap_info(struct mmap_info *mip)
599{
600 mip->buf_size = buf_size;
601 mip->buf_nr = buf_nr;
602 mip->pagesize = pagesize;
603}
b7106311 604
3fe0b570
AB
605static void net_close_connection(int *fd)
606{
607 shutdown(*fd, SHUT_RDWR);
608 close(*fd);
609 *fd = -1;
610}
ff11d54c 611
3fe0b570
AB
612static void dpp_free(struct devpath *dpp)
613{
614 if (dpp->stats)
615 free(dpp->stats);
616 if (dpp->ios)
617 free(dpp->ios);
618 if (dpp->path)
619 free(dpp->path);
620 if (dpp->buts_name)
621 free(dpp->buts_name);
622 free(dpp);
623}
d0ca268b 624
3fe0b570
AB
625static int lock_on_cpu(int cpu)
626{
0a915aab
NZ
627 cpu_set_t * cpu_mask;
628 size_t size;
d045a704
JK
629
630 cpu_mask = CPU_ALLOC(max_cpus);
631 size = CPU_ALLOC_SIZE(max_cpus);
0a915aab
NZ
632
633 CPU_ZERO_S(size, cpu_mask);
634 CPU_SET_S(cpu, size, cpu_mask);
635 if (sched_setaffinity(0, size, cpu_mask) < 0) {
636 CPU_FREE(cpu_mask);
3fe0b570 637 return errno;
0a915aab 638 }
d0ca268b 639
0a915aab 640 CPU_FREE(cpu_mask);
3fe0b570
AB
641 return 0;
642}
e7c9f3ff 643
3fe0b570
AB
644static int increase_limit(int resource, rlim_t increase)
645{
646 struct rlimit rlim;
647 int save_errno = errno;
8e86c98a 648
3fe0b570
AB
649 if (!getrlimit(resource, &rlim)) {
650 rlim.rlim_cur += increase;
651 if (rlim.rlim_cur >= rlim.rlim_max)
652 rlim.rlim_max = rlim.rlim_cur + increase;
72ca8801 653
3fe0b570
AB
654 if (!setrlimit(resource, &rlim))
655 return 1;
656 }
99c1f5ab 657
3fe0b570
AB
658 errno = save_errno;
659 return 0;
660}
e0a1988b 661
3fe0b570
AB
662static int handle_open_failure(void)
663{
664 if (errno == ENFILE || errno == EMFILE)
665 return increase_limit(RLIMIT_NOFILE, 16);
666 return 0;
667}
99c1f5ab 668
3fe0b570
AB
669static int handle_mem_failure(size_t length)
670{
671 if (errno == ENFILE)
672 return handle_open_failure();
673 else if (errno == ENOMEM)
674 return increase_limit(RLIMIT_MEMLOCK, 2 * length);
675 return 0;
676}
99c1f5ab 677
3fe0b570
AB
678static FILE *my_fopen(const char *path, const char *mode)
679{
680 FILE *fp;
8e86c98a 681
3fe0b570
AB
682 do {
683 fp = fopen(path, mode);
684 } while (fp == NULL && handle_open_failure());
8e86c98a 685
3fe0b570
AB
686 return fp;
687}
8e86c98a 688
3fe0b570
AB
689static int my_open(const char *path, int flags)
690{
691 int fd;
8e86c98a 692
3fe0b570
AB
693 do {
694 fd = open(path, flags);
695 } while (fd < 0 && handle_open_failure());
e0a1988b 696
3fe0b570
AB
697 return fd;
698}
ff11d54c 699
3fe0b570
AB
700static int my_socket(int domain, int type, int protocol)
701{
702 int fd;
ff11d54c 703
3fe0b570
AB
704 do {
705 fd = socket(domain, type, protocol);
706 } while (fd < 0 && handle_open_failure());
8e86c98a 707
3fe0b570
AB
708 return fd;
709}
710
d5302b03
AB
711static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
712{
713 int fd;
714
715 do {
716 fd = accept(sockfd, addr, addrlen);
717 } while (fd < 0 && handle_open_failure());
718
719 return fd;
720}
721
3fe0b570
AB
722static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
723 off_t offset)
69dd57c2 724{
3fe0b570 725 void *new;
69dd57c2 726
3fe0b570
AB
727 do {
728 new = mmap(addr, length, prot, flags, fd, offset);
729 } while (new == MAP_FAILED && handle_mem_failure(length));
730
731 return new;
732}
733
ae7c049d
TM
734static int my_mlock(struct tracer *tp,
735 const void *addr, size_t len)
3fe0b570 736{
ae7c049d 737 int ret, retry = 0;
3fe0b570
AB
738
739 do {
740 ret = mlock(addr, len);
ae7c049d
TM
741 if ((retry >= 10) && tp && tp->is_done)
742 break;
743 retry++;
3fe0b570
AB
744 } while (ret < 0 && handle_mem_failure(len));
745
746 return ret;
747}
748
ae7c049d
TM
749static int setup_mmap(int fd, unsigned int maxlen,
750 struct mmap_info *mip,
751 struct tracer *tp)
055cc3e5
AB
752{
753 if (mip->fs_off + maxlen > mip->fs_buf_len) {
754 unsigned long nr = max(16, mip->buf_nr);
755
756 if (mip->fs_buf) {
757 munlock(mip->fs_buf, mip->fs_buf_len);
758 munmap(mip->fs_buf, mip->fs_buf_len);
759 mip->fs_buf = NULL;
760 }
761
762 mip->fs_off = mip->fs_size & (mip->pagesize - 1);
763 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
764 mip->fs_max_size += mip->fs_buf_len;
765
766 if (ftruncate(fd, mip->fs_max_size) < 0) {
767 perror("setup_mmap: ftruncate");
768 return 1;
769 }
770
771 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
772 MAP_SHARED, fd,
773 mip->fs_size - mip->fs_off);
774 if (mip->fs_buf == MAP_FAILED) {
775 perror("setup_mmap: mmap");
776 return 1;
777 }
ae7c049d
TM
778 if (my_mlock(tp, mip->fs_buf, mip->fs_buf_len) < 0) {
779 perror("setup_mlock: mlock");
780 return 1;
781 }
055cc3e5
AB
782 }
783
784 return 0;
785}
786
3fe0b570
AB
787static int __stop_trace(int fd)
788{
789 /*
790 * Should be stopped, don't complain if it isn't
791 */
792 ioctl(fd, BLKTRACESTOP);
793 return ioctl(fd, BLKTRACETEARDOWN);
794}
795
796static int write_data(char *buf, int len)
797{
798 int ret;
799
800rewrite:
801 ret = fwrite(buf, len, 1, pfp);
802 if (ferror(pfp) || ret != 1) {
803 if (errno == EINTR) {
804 clearerr(pfp);
805 goto rewrite;
806 }
807
808 if (!piped_output || (errno != EPIPE && errno != EBADF)) {
809 fprintf(stderr, "write(%d) failed: %d/%s\n",
810 len, errno, strerror(errno));
69dd57c2 811 }
3fe0b570 812 goto err;
69dd57c2
AB
813 }
814
3fe0b570 815 fflush(pfp);
69dd57c2 816 return 0;
3fe0b570
AB
817
818err:
819 clearerr(pfp);
820 return 1;
69dd57c2
AB
821}
822
823/*
3fe0b570 824 * Returns the number of bytes read (successfully)
69dd57c2 825 */
3fe0b570 826static int __net_recv_data(int fd, void *buf, unsigned int len)
69dd57c2 827{
3fe0b570
AB
828 unsigned int bytes_left = len;
829
830 while (bytes_left && !done) {
831 int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
832
833 if (ret == 0)
834 break;
835 else if (ret < 0) {
5d65b5e6
AB
836 if (errno == EAGAIN) {
837 usleep(50);
838 continue;
839 }
840 perror("server: net_recv_data: recv failed");
055cc3e5 841 break;
3fe0b570
AB
842 } else {
843 buf += ret;
844 bytes_left -= ret;
845 }
846 }
69dd57c2 847
3fe0b570 848 return len - bytes_left;
69dd57c2
AB
849}
850
3fe0b570 851static int net_recv_data(int fd, void *buf, unsigned int len)
8e86c98a 852{
3fe0b570
AB
853 return __net_recv_data(fd, buf, len);
854}
7035d92d 855
3fe0b570
AB
856/*
857 * Returns number of bytes written
858 */
859static int net_send_data(int fd, void *buf, unsigned int buf_len)
860{
861 int ret;
862 unsigned int bytes_left = buf_len;
863
864 while (bytes_left) {
865 ret = send(fd, buf, bytes_left, 0);
866 if (ret < 0) {
867 perror("send");
868 break;
869 }
870
871 buf += ret;
872 bytes_left -= ret;
7035d92d
JA
873 }
874
3fe0b570 875 return buf_len - bytes_left;
8e86c98a
JA
876}
877
3fe0b570 878static int net_send_header(int fd, int cpu, char *buts_name, int len)
eb3c8108 879{
3fe0b570 880 struct blktrace_net_hdr hdr;
eb3c8108 881
3fe0b570 882 memset(&hdr, 0, sizeof(hdr));
eb3c8108 883
3fe0b570 884 hdr.magic = BLK_IO_TRACE_MAGIC;
d324757e 885 memset(hdr.buts_name, 0, sizeof(hdr.buts_name));
3fe0b570 886 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
d324757e 887 hdr.buts_name[sizeof(hdr.buts_name) - 1] = '\0';
3fe0b570 888 hdr.cpu = cpu;
d045a704 889 hdr.max_cpus = max_cpus;
3fe0b570
AB
890 hdr.len = len;
891 hdr.cl_id = getpid();
892 hdr.buf_size = buf_size;
893 hdr.buf_nr = buf_nr;
894 hdr.page_size = pagesize;
eb3c8108 895
3fe0b570
AB
896 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
897}
eb3c8108 898
3fe0b570
AB
899static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
900{
901 struct blktrace_net_hdr ret_hdr;
eb3c8108 902
3fe0b570
AB
903 net_send_header(fd, cpu, buts_name, len);
904 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
905}
eb3c8108 906
3fe0b570
AB
907static void net_send_open(int fd, int cpu, char *buts_name)
908{
909 net_send_open_close(fd, cpu, buts_name, 0);
eb3c8108
JA
910}
911
3fe0b570 912static void net_send_close(int fd, char *buts_name, int drops)
d0ca268b 913{
3fe0b570
AB
914 /*
915 * Overload CPU w/ number of drops
916 *
917 * XXX: Need to clear/set done around call - done=1 (which
918 * is true here) stops reads from happening... :-(
919 */
920 done = 0;
921 net_send_open_close(fd, drops, buts_name, 1);
922 done = 1;
923}
d0ca268b 924
3fe0b570
AB
925static void ack_open_close(int fd, char *buts_name)
926{
927 net_send_header(fd, 0, buts_name, 2);
928}
d0ca268b 929
3fe0b570
AB
930static void net_send_drops(int fd)
931{
932 struct list_head *p;
ed71a31e 933
3fe0b570
AB
934 __list_for_each(p, &devpaths) {
935 struct devpath *dpp = list_entry(p, struct devpath, head);
936
937 net_send_close(fd, dpp->buts_name, dpp->drops);
d0ca268b 938 }
3fe0b570 939}
d0ca268b 940
3fe0b570
AB
941/*
942 * Returns:
055cc3e5
AB
943 * 0: "EOF"
944 * 1: OK
945 * -1: Error
3fe0b570
AB
946 */
947static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
948{
949 int bytes_read;
950 int fl = fcntl(nc->fd, F_GETFL);
951
952 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
953 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
954 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
955
956 if (bytes_read == sizeof(*bnh))
957 return 1;
958 else if (bytes_read == 0)
959 return 0;
055cc3e5
AB
960 else
961 return -1;
d0ca268b
JA
962}
963
e58f3937 964static int net_setup_addr(void)
d0ca268b 965{
e58f3937 966 struct sockaddr_in *addr = &hostname_addr;
cf9208ea 967
e58f3937
AB
968 memset(addr, 0, sizeof(*addr));
969 addr->sin_family = AF_INET;
970 addr->sin_port = htons(net_port);
3fe0b570 971
e58f3937
AB
972 if (inet_aton(hostname, &addr->sin_addr) != 1) {
973 struct hostent *hent;
974retry:
975 hent = gethostbyname(hostname);
3fe0b570 976 if (!hent) {
e58f3937
AB
977 if (h_errno == TRY_AGAIN) {
978 usleep(100);
979 goto retry;
980 } else if (h_errno == NO_RECOVERY) {
981 fprintf(stderr, "gethostbyname(%s)"
982 "non-recoverable error encountered\n",
983 hostname);
984 } else {
985 /*
986 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA
987 */
988 fprintf(stderr, "Host %s not found\n",
989 hostname);
990 }
3fe0b570
AB
991 return 1;
992 }
993
e58f3937 994 memcpy(&addr->sin_addr, hent->h_addr, 4);
d324757e
ES
995 memset(hostname, 0, sizeof(hostname));
996 strncpy(hostname, hent->h_name, sizeof(hostname));
997 hostname[sizeof(hostname) - 1] = '\0';
3fe0b570 998 }
7035d92d 999
e58f3937
AB
1000 return 0;
1001}
1002
1003static int net_setup_client(void)
1004{
1005 int fd;
1006 struct sockaddr_in *addr = &hostname_addr;
1007
3fe0b570
AB
1008 fd = my_socket(AF_INET, SOCK_STREAM, 0);
1009 if (fd < 0) {
1010 perror("client: socket");
1011 return -1;
1012 }
cf9208ea 1013
e58f3937 1014 if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
3fe0b570
AB
1015 if (errno == ECONNREFUSED)
1016 fprintf(stderr,
1017 "\nclient: Connection to %s refused, "
1018 "perhaps the server is not started?\n\n",
1019 hostname);
1020 else
1021 perror("client: connect");
055cc3e5 1022
3fe0b570
AB
1023 close(fd);
1024 return -1;
707b0914 1025 }
3fe0b570
AB
1026
1027 return fd;
d0ca268b
JA
1028}
1029
3fe0b570 1030static int open_client_connections(void)
e7c9f3ff 1031{
3fe0b570 1032 int cpu;
d045a704 1033 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
e7c9f3ff 1034
3fe0b570 1035 cl_fds = calloc(ncpus, sizeof(*cl_fds));
d045a704
JK
1036 for (cpu = 0; cpu < max_cpus; cpu++) {
1037 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1038 continue;
3fe0b570
AB
1039 cl_fds[cpu] = net_setup_client();
1040 if (cl_fds[cpu] < 0)
1041 goto err;
eb3c8108 1042 }
3fe0b570
AB
1043 return 0;
1044
1045err:
1046 while (cpu > 0)
1047 close(cl_fds[cpu--]);
1048 free(cl_fds);
1049 return 1;
e7c9f3ff
NS
1050}
1051
3fe0b570 1052static void close_client_connections(void)
eb3c8108 1053{
3fe0b570
AB
1054 if (cl_fds) {
1055 int cpu, *fdp;
d045a704 1056 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
eb3c8108 1057
d045a704
JK
1058 for (cpu = 0, fdp = cl_fds; cpu < max_cpus; cpu++, fdp++) {
1059 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1060 continue;
3fe0b570
AB
1061 if (*fdp >= 0) {
1062 net_send_drops(*fdp);
1063 net_close_connection(fdp);
1064 }
7934e668 1065 }
3fe0b570 1066 free(cl_fds);
ff11d54c 1067 }
eb3c8108
JA
1068}
1069
ab6809de 1070static int setup_buts(void)
d0ca268b 1071{
3fe0b570 1072 struct list_head *p;
ab6809de 1073 int ret = 0;
bbabf03a 1074
3fe0b570
AB
1075 __list_for_each(p, &devpaths) {
1076 struct blk_user_trace_setup buts;
1077 struct devpath *dpp = list_entry(p, struct devpath, head);
ae9f71b3 1078
3fe0b570
AB
1079 memset(&buts, 0, sizeof(buts));
1080 buts.buf_size = buf_size;
1081 buts.buf_nr = buf_nr;
1082 buts.act_mask = act_mask;
1083
055cc3e5 1084 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) {
d045a704 1085 dpp->ncpus = max_cpus;
3b552a2d 1086 dpp->buts_name = strdup(buts.name);
e63098f3 1087 dpp->setup_done = 1;
3b552a2d
AB
1088 if (dpp->stats)
1089 free(dpp->stats);
1090 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
1091 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
ab6809de 1092 } else {
055cc3e5
AB
1093 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
1094 dpp->path, errno, strerror(errno));
ab6809de
JA
1095 ret++;
1096 }
3b552a2d 1097 }
ab6809de
JA
1098
1099 return ret;
3b552a2d
AB
1100}
1101
1102static void start_buts(void)
1103{
1104 struct list_head *p;
1105
1106 __list_for_each(p, &devpaths) {
1107 struct devpath *dpp = list_entry(p, struct devpath, head);
1108
1109 if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
3fe0b570
AB
1110 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
1111 dpp->path, errno, strerror(errno));
bbabf03a 1112 }
3fe0b570 1113 }
8a43bac5
JA
1114}
1115
3fe0b570 1116static int get_drops(struct devpath *dpp)
8e86c98a 1117{
3fe0b570
AB
1118 int fd, drops = 0;
1119 char fn[MAXPATHLEN + 64], tmp[256];
8e86c98a 1120
3fe0b570
AB
1121 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
1122 dpp->buts_name);
8e86c98a 1123
3fe0b570
AB
1124 fd = my_open(fn, O_RDONLY);
1125 if (fd < 0) {
1126 /*
1127 * This may be ok: the kernel may not support
1128 * dropped counts.
1129 */
1130 if (errno != ENOENT)
1131 fprintf(stderr, "Could not open %s: %d/%s\n",
1132 fn, errno, strerror(errno));
1133 return 0;
1134 } else if (read(fd, tmp, sizeof(tmp)) < 0) {
1135 fprintf(stderr, "Could not read %s: %d/%s\n",
1136 fn, errno, strerror(errno));
1137 } else
1138 drops = atoi(tmp);
1139 close(fd);
8e86c98a 1140
3fe0b570 1141 return drops;
8e86c98a
JA
1142}
1143
3fe0b570 1144static void get_all_drops(void)
a3e4d330 1145{
3fe0b570 1146 struct list_head *p;
21f55651 1147
3fe0b570
AB
1148 __list_for_each(p, &devpaths) {
1149 struct devpath *dpp = list_entry(p, struct devpath, head);
055cc3e5 1150
3fe0b570 1151 dpp->drops = get_drops(dpp);
21f55651 1152 }
9db17354 1153}
eb3c8108 1154
3fe0b570 1155static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
9db17354 1156{
3fe0b570 1157 struct trace_buf *tbp;
21f55651 1158
3fe0b570
AB
1159 tbp = malloc(sizeof(*tbp) + bufsize);
1160 INIT_LIST_HEAD(&tbp->head);
1161 tbp->len = 0;
1162 tbp->buf = (void *)(tbp + 1);
1163 tbp->cpu = cpu;
1164 tbp->dpp = NULL; /* Will be set when tbp is added */
21f55651 1165
3fe0b570 1166 return tbp;
a3e4d330
JA
1167}
1168
3fe0b570 1169static void free_tracer_heads(struct devpath *dpp)
b7106311 1170{
3fe0b570
AB
1171 int cpu;
1172 struct tracer_devpath_head *hd;
b7106311 1173
d045a704 1174 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
3fe0b570
AB
1175 if (hd->prev)
1176 free(hd->prev);
055cc3e5 1177
3fe0b570
AB
1178 pthread_mutex_destroy(&hd->mutex);
1179 }
1180 free(dpp->heads);
1181}
b7106311 1182
3fe0b570
AB
1183static int setup_tracer_devpaths(void)
1184{
1185 struct list_head *p;
b7106311 1186
3fe0b570
AB
1187 if (net_client_use_send())
1188 if (open_client_connections())
1189 return 1;
b7106311 1190
3fe0b570
AB
1191 __list_for_each(p, &devpaths) {
1192 int cpu;
1193 struct tracer_devpath_head *hd;
1194 struct devpath *dpp = list_entry(p, struct devpath, head);
b7106311 1195
d045a704
JK
1196 dpp->heads = calloc(max_cpus, sizeof(struct tracer_devpath_head));
1197 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
3fe0b570
AB
1198 INIT_LIST_HEAD(&hd->head);
1199 pthread_mutex_init(&hd->mutex, NULL);
1200 hd->prev = NULL;
1201 }
b7106311
JA
1202 }
1203
3fe0b570 1204 return 0;
b7106311
JA
1205}
1206
3fe0b570
AB
1207static inline void add_trace_buf(struct devpath *dpp, int cpu,
1208 struct trace_buf **tbpp)
18eed2a7 1209{
3fe0b570
AB
1210 struct trace_buf *tbp = *tbpp;
1211 struct tracer_devpath_head *hd = &dpp->heads[cpu];
18eed2a7 1212
3fe0b570 1213 tbp->dpp = dpp;
2f064793 1214
3fe0b570
AB
1215 pthread_mutex_lock(&hd->mutex);
1216 list_add_tail(&tbp->head, &hd->head);
1217 pthread_mutex_unlock(&hd->mutex);
18eed2a7 1218
3fe0b570 1219 *tbpp = alloc_trace_buf(cpu, buf_size);
18eed2a7
JA
1220}
1221
3fe0b570 1222static inline void incr_entries(int entries_handled)
a3e4d330 1223{
3fe0b570
AB
1224 pthread_mutex_lock(&dp_mutex);
1225 if (dp_entries == 0)
1226 pthread_cond_signal(&dp_cond);
1227 dp_entries += entries_handled;
1228 pthread_mutex_unlock(&dp_mutex);
a3e4d330
JA
1229}
1230
055cc3e5
AB
1231static void decr_entries(int handled)
1232{
1233 pthread_mutex_lock(&dp_mutex);
1234 dp_entries -= handled;
1235 pthread_mutex_unlock(&dp_mutex);
1236}
1237
1238static int wait_empty_entries(void)
1239{
1240 pthread_mutex_lock(&dp_mutex);
1241 while (!done && dp_entries == 0)
1242 t_pthread_cond_wait(&dp_cond, &dp_mutex);
1243 pthread_mutex_unlock(&dp_mutex);
1244
1245 return !done;
1246}
1247
3fe0b570 1248static int add_devpath(char *path)
8e86c98a 1249{
3fe0b570
AB
1250 int fd;
1251 struct devpath *dpp;
4b747a40 1252 struct list_head *p;
3fe0b570 1253
4b747a40
ES
1254 /*
1255 * Verify device is not duplicated
1256 */
1257 __list_for_each(p, &devpaths) {
1258 struct devpath *tmp = list_entry(p, struct devpath, head);
1259 if (!strcmp(tmp->path, path))
1260 return 0;
1261 }
8e86c98a 1262 /*
3fe0b570 1263 * Verify device is valid before going too far
8e86c98a 1264 */
3fe0b570
AB
1265 fd = my_open(path, O_RDONLY | O_NONBLOCK);
1266 if (fd < 0) {
1267 fprintf(stderr, "Invalid path %s specified: %d/%s\n",
1268 path, errno, strerror(errno));
1269 return 1;
1270 }
8e86c98a 1271
3fe0b570
AB
1272 dpp = malloc(sizeof(*dpp));
1273 memset(dpp, 0, sizeof(*dpp));
1274 dpp->path = strdup(path);
1275 dpp->fd = fd;
70598a36 1276 ndevs++;
3fe0b570 1277 list_add_tail(&dpp->head, &devpaths);
8e86c98a 1278
3fe0b570 1279 return 0;
8e86c98a
JA
1280}
1281
3fe0b570 1282static void rel_devpaths(void)
a3e4d330 1283{
3fe0b570 1284 struct list_head *p, *q;
a3e4d330 1285
3fe0b570
AB
1286 list_for_each_safe(p, q, &devpaths) {
1287 struct devpath *dpp = list_entry(p, struct devpath, head);
a3e4d330 1288
3fe0b570 1289 list_del(&dpp->head);
e63098f3 1290 if (dpp->setup_done)
1291 __stop_trace(dpp->fd);
3fe0b570 1292 close(dpp->fd);
a3e4d330 1293
3fe0b570
AB
1294 if (dpp->heads)
1295 free_tracer_heads(dpp);
a3e4d330 1296
3fe0b570
AB
1297 dpp_free(dpp);
1298 ndevs--;
b7106311 1299 }
8e86c98a 1300}
b7106311 1301
3fe0b570 1302static int flush_subbuf_net(struct trace_buf *tbp)
8e86c98a 1303{
3fe0b570
AB
1304 int fd = cl_fds[tbp->cpu];
1305 struct devpath *dpp = tbp->dpp;
b7106311 1306
3fe0b570
AB
1307 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
1308 return 1;
055cc3e5 1309 else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
3fe0b570 1310 return 1;
a3e4d330 1311
8e86c98a 1312 return 0;
a3e4d330
JA
1313}
1314
3fe0b570
AB
1315static int
1316handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
1317 struct list_head *list)
8e86c98a 1318{
3fe0b570
AB
1319 struct trace_buf *tbp;
1320 struct list_head *p, *q;
1321 int entries_handled = 0;
8e86c98a 1322
3fe0b570
AB
1323 list_for_each_safe(p, q, list) {
1324 tbp = list_entry(p, struct trace_buf, head);
8e86c98a 1325
3fe0b570
AB
1326 list_del(&tbp->head);
1327 entries_handled++;
6a752c90 1328
3fe0b570
AB
1329 if (cl_fds[tbp->cpu] >= 0) {
1330 if (flush_subbuf_net(tbp)) {
1331 close(cl_fds[tbp->cpu]);
1332 cl_fds[tbp->cpu] = -1;
1333 }
1334 }
7ab2f837 1335
3fe0b570 1336 free(tbp);
7934e668
JA
1337 }
1338
3fe0b570 1339 return entries_handled;
6a752c90
JA
1340}
1341
055cc3e5
AB
1342/*
1343 * Tack 'tbp's buf onto the tail of 'prev's buf
1344 */
1345static struct trace_buf *tb_combine(struct trace_buf *prev,
1346 struct trace_buf *tbp)
1347{
1348 unsigned long tot_len;
1349
1350 tot_len = prev->len + tbp->len;
1351 if (tot_len > buf_size) {
1352 /*
1353 * tbp->head isn't connected (it was 'prev'
1354 * so it had been taken off of the list
1355 * before). Therefore, we can realloc
1356 * the whole structures, as the other fields
1357 * are "static".
1358 */
d8365957 1359 prev = realloc(prev, sizeof(*prev) + tot_len);
055cc3e5
AB
1360 prev->buf = (void *)(prev + 1);
1361 }
1362
1363 memcpy(prev->buf + prev->len, tbp->buf, tbp->len);
1364 prev->len = tot_len;
1365
1366 free(tbp);
1367 return prev;
1368}
1369
3fe0b570
AB
1370static int handle_list_file(struct tracer_devpath_head *hd,
1371 struct list_head *list)
f6fead25 1372{
3fe0b570
AB
1373 int off, t_len, nevents;
1374 struct blk_io_trace *t;
1375 struct list_head *p, *q;
1376 int entries_handled = 0;
1377 struct trace_buf *tbp, *prev;
11629347 1378
3fe0b570
AB
1379 prev = hd->prev;
1380 list_for_each_safe(p, q, list) {
1381 tbp = list_entry(p, struct trace_buf, head);
1382 list_del(&tbp->head);
1383 entries_handled++;
18eed2a7 1384
3fe0b570
AB
1385 /*
1386 * If there was some leftover before, tack this new
1387 * entry onto the tail of the previous one.
1388 */
055cc3e5
AB
1389 if (prev)
1390 tbp = tb_combine(prev, tbp);
ff11d54c 1391
3fe0b570
AB
1392 /*
1393 * See how many whole traces there are - send them
1394 * all out in one go.
1395 */
1396 off = 0;
1397 nevents = 0;
1398 while (off + (int)sizeof(*t) <= tbp->len) {
1399 t = (struct blk_io_trace *)(tbp->buf + off);
1400 t_len = sizeof(*t) + t->pdu_len;
1401 if (off + t_len > tbp->len)
1402 break;
ff11d54c 1403
3fe0b570
AB
1404 off += t_len;
1405 nevents++;
1406 }
1407 if (nevents)
1408 pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
4aeec019 1409
3fe0b570
AB
1410 /*
1411 * Write any full set of traces, any remaining data is kept
1412 * for the next pass.
1413 */
1414 if (off) {
055cc3e5 1415 if (write_data(tbp->buf, off) || off == tbp->len) {
3fe0b570 1416 free(tbp);
055cc3e5
AB
1417 prev = NULL;
1418 }
3fe0b570
AB
1419 else {
1420 /*
1421 * Move valid data to beginning of buffer
1422 */
1423 tbp->len -= off;
1424 memmove(tbp->buf, tbp->buf + off, tbp->len);
1425 prev = tbp;
1426 }
1427 } else
1428 prev = tbp;
ff11d54c 1429 }
3fe0b570 1430 hd->prev = prev;
ff11d54c 1431
3fe0b570 1432 return entries_handled;
ff11d54c
TZ
1433}
1434
3fe0b570 1435static void __process_trace_bufs(void)
8a43bac5 1436{
3fe0b570
AB
1437 int cpu;
1438 struct list_head *p;
1439 struct list_head list;
1440 int handled = 0;
1441
1442 __list_for_each(p, &devpaths) {
1443 struct devpath *dpp = list_entry(p, struct devpath, head);
1444 struct tracer_devpath_head *hd = dpp->heads;
1445
d045a704 1446 for (cpu = 0; cpu < max_cpus; cpu++, hd++) {
3fe0b570
AB
1447 pthread_mutex_lock(&hd->mutex);
1448 if (list_empty(&hd->head)) {
1449 pthread_mutex_unlock(&hd->mutex);
1450 continue;
1451 }
8a43bac5 1452
3fe0b570
AB
1453 list_replace_init(&hd->head, &list);
1454 pthread_mutex_unlock(&hd->mutex);
6480258a 1455
3fe0b570
AB
1456 handled += handle_list(hd, &list);
1457 }
d0ca268b
JA
1458 }
1459
055cc3e5
AB
1460 if (handled)
1461 decr_entries(handled);
8a43bac5
JA
1462}
1463
3fe0b570 1464static void process_trace_bufs(void)
8a43bac5 1465{
055cc3e5 1466 while (wait_empty_entries())
3fe0b570 1467 __process_trace_bufs();
3fe0b570 1468}
3a9d6c13 1469
3fe0b570
AB
1470static void clean_trace_bufs(void)
1471{
3a9d6c13 1472 /*
3fe0b570
AB
1473 * No mutex needed here: we're only reading from the lists,
1474 * tracers are done
3a9d6c13 1475 */
3fe0b570
AB
1476 while (dp_entries)
1477 __process_trace_bufs();
1478}
4b5db44a 1479
3fe0b570
AB
1480static inline void read_err(int cpu, char *ifn)
1481{
1482 if (errno != EAGAIN)
1483 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
1484 cpu, ifn, errno, strerror(errno));
4b5db44a
JA
1485}
1486
3fe0b570 1487static int net_sendfile(struct io_info *iop)
d5396421 1488{
3fe0b570 1489 int ret;
d5396421 1490
3fe0b570
AB
1491 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
1492 if (ret < 0) {
1493 perror("sendfile");
1494 return 1;
1495 } else if (ret < (int)iop->ready) {
1496 fprintf(stderr, "short sendfile send (%d of %d)\n",
1497 ret, iop->ready);
1498 return 1;
1499 }
91816d54 1500
9db17354 1501 return 0;
91816d54
JA
1502}
1503
3fe0b570 1504static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
d0ca268b 1505{
3fe0b570 1506 struct devpath *dpp = iop->dpp;
d0ca268b 1507
3fe0b570
AB
1508 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
1509 return 1;
1510 return net_sendfile(iop);
1511}
d0ca268b 1512
d7a1f726
JK
1513static int fill_ofname(char *dst, int dstlen, char *subdir, char *buts_name,
1514 int cpu)
8e86c98a 1515{
3fe0b570 1516 int len;
e3bf54d8 1517 struct stat sb;
8e86c98a
JA
1518
1519 if (output_dir)
d7a1f726 1520 len = snprintf(dst, dstlen, "%s/", output_dir);
dd870ef6 1521 else
d7a1f726 1522 len = snprintf(dst, dstlen, "./");
8e86c98a 1523
d7a1f726
JK
1524 if (subdir)
1525 len += snprintf(dst + len, dstlen - len, "%s", subdir);
e0a1988b 1526
d7a1f726 1527 if (stat(dst, &sb) < 0) {
e3bf54d8 1528 if (errno != ENOENT) {
3fe0b570
AB
1529 fprintf(stderr,
1530 "Destination dir %s stat failed: %d/%s\n",
d7a1f726 1531 dst, errno, strerror(errno));
e3bf54d8
JA
1532 return 1;
1533 }
60886290
JM
1534 /*
1535 * There is no synchronization between multiple threads
1536 * trying to create the directory at once. It's harmless
1537 * to let them try, so just detect the problem and move on.
1538 */
d7a1f726 1539 if (mkdir(dst, 0755) < 0 && errno != EEXIST) {
3fe0b570
AB
1540 fprintf(stderr,
1541 "Destination dir %s can't be made: %d/%s\n",
d7a1f726 1542 dst, errno, strerror(errno));
e3bf54d8
JA
1543 return 1;
1544 }
1545 }
1546
8e86c98a 1547 if (output_name)
d7a1f726 1548 snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
3fe0b570 1549 output_name, cpu);
8e86c98a 1550 else
d7a1f726
JK
1551 snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
1552 buts_name, cpu);
e3bf54d8
JA
1553
1554 return 0;
8e86c98a
JA
1555}
1556
3fe0b570 1557static int set_vbuf(struct io_info *iop, int mode, size_t size)
0cc7d25e 1558{
3fe0b570
AB
1559 iop->obuf = malloc(size);
1560 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
1561 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
1562 iop->dpp->path, (int)size, errno,
1563 strerror(errno));
1564 free(iop->obuf);
ddf22842
JA
1565 return 1;
1566 }
d5396421 1567
ddf22842
JA
1568 return 0;
1569}
007c233c 1570
3fe0b570 1571static int iop_open(struct io_info *iop, int cpu)
ddf22842 1572{
d7a1f726
JK
1573 char hostdir[MAXPATHLEN + 64];
1574
3fe0b570 1575 iop->ofd = -1;
d7a1f726
JK
1576 if (net_mode == Net_server) {
1577 struct cl_conn *nc = iop->nc;
1578 int len;
1579
1580 len = snprintf(hostdir, sizeof(hostdir), "%s-",
1581 nc->ch->hostname);
1582 len += strftime(hostdir + len, sizeof(hostdir) - len, "%F-%T/",
1583 gmtime(&iop->dpp->cl_connect_time));
1584 } else {
1585 hostdir[0] = 0;
1586 }
1587
1588 if (fill_ofname(iop->ofn, sizeof(iop->ofn), hostdir,
1589 iop->dpp->buts_name, cpu))
3fe0b570 1590 return 1;
0cc7d25e 1591
3fe0b570
AB
1592 iop->ofp = my_fopen(iop->ofn, "w+");
1593 if (iop->ofp == NULL) {
1594 fprintf(stderr, "Open output file %s failed: %d/%s\n",
1595 iop->ofn, errno, strerror(errno));
1596 return 1;
1597 }
055cc3e5 1598
3fe0b570
AB
1599 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
1600 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
1601 iop->ofn, errno, strerror(errno));
1602 fclose(iop->ofp);
1603 return 1;
d0ca268b
JA
1604 }
1605
3fe0b570 1606 iop->ofd = fileno(iop->ofp);
e7c9f3ff 1607 return 0;
d0ca268b
JA
1608}
1609
df81fdb5
AB
1610static void close_iop(struct io_info *iop)
1611{
1612 struct mmap_info *mip = &iop->mmap_info;
1613
1614 if (mip->fs_buf)
1615 munmap(mip->fs_buf, mip->fs_buf_len);
1616
1617 if (!piped_output) {
1618 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
1619 fprintf(stderr,
1620 "Ignoring err: ftruncate(%s): %d/%s\n",
1621 iop->ofn, errno, strerror(errno));
1622 }
1623 }
1624
1625 if (iop->ofp)
1626 fclose(iop->ofp);
1627 if (iop->obuf)
1628 free(iop->obuf);
1629}
1630
1631static void close_ios(struct tracer *tp)
1632{
1633 while (tp->nios > 0) {
1634 struct io_info *iop = &tp->ios[--tp->nios];
1635
1636 iop->dpp->drops = get_drops(iop->dpp);
1637 if (iop->ifd >= 0)
1638 close(iop->ifd);
1639
1640 if (iop->ofp)
1641 close_iop(iop);
1642 else if (iop->ofd >= 0) {
1643 struct devpath *dpp = iop->dpp;
1644
1645 net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
1646 net_close_connection(&iop->ofd);
1647 }
1648 }
1649
1650 free(tp->ios);
1651 free(tp->pfds);
1652}
1653
3fe0b570 1654static int open_ios(struct tracer *tp)
3aabcd89 1655{
3fe0b570
AB
1656 struct pollfd *pfd;
1657 struct io_info *iop;
1658 struct list_head *p;
1659
1660 tp->ios = calloc(ndevs, sizeof(struct io_info));
3fe0b570 1661 memset(tp->ios, 0, ndevs * sizeof(struct io_info));
055cc3e5
AB
1662
1663 tp->pfds = calloc(ndevs, sizeof(struct pollfd));
3fe0b570
AB
1664 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
1665
1666 tp->nios = 0;
1667 iop = tp->ios;
1668 pfd = tp->pfds;
1669 __list_for_each(p, &devpaths) {
1670 struct devpath *dpp = list_entry(p, struct devpath, head);
1671
1672 iop->dpp = dpp;
1673 iop->ofd = -1;
1674 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
1675 debugfs_path, dpp->buts_name, tp->cpu);
1676
1677 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
1678 if (iop->ifd < 0) {
1679 fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
1680 tp->cpu, iop->ifn, errno, strerror(errno));
1681 return 1;
1682 }
1683
1684 init_mmap_info(&iop->mmap_info);
1685
1686 pfd->fd = iop->ifd;
1687 pfd->events = POLLIN;
1688
1689 if (piped_output)
1690 ;
1691 else if (net_client_use_sendfile()) {
1692 iop->ofd = net_setup_client();
1693 if (iop->ofd < 0)
1694 goto err;
1695 net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
1696 } else if (net_mode == Net_none) {
1697 if (iop_open(iop, tp->cpu))
1698 goto err;
1699 } else {
1700 /*
1701 * This ensures that the server knows about all
1702 * connections & devices before _any_ closes
1703 */
1704 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
1705 }
007c233c 1706
3fe0b570
AB
1707 pfd++;
1708 iop++;
1709 tp->nios++;
9db17354 1710 }
3aabcd89 1711
3fe0b570 1712 return 0;
72ca8801 1713
3fe0b570
AB
1714err:
1715 close(iop->ifd); /* tp->nios _not_ bumped */
df81fdb5 1716 close_ios(tp);
3fe0b570 1717 return 1;
e7c9f3ff
NS
1718}
1719
3fe0b570 1720static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
e7c9f3ff 1721{
3fe0b570
AB
1722 struct mmap_info *mip;
1723 int i, ret, nentries = 0;
1724 struct pollfd *pfd = tp->pfds;
1725 struct io_info *iop = tp->ios;
1726
1727 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
1728 if (pfd->revents & POLLIN || force_read) {
1729 mip = &iop->mmap_info;
1730
ae7c049d 1731 ret = setup_mmap(iop->ofd, buf_size, mip, tp);
3fe0b570
AB
1732 if (ret < 0) {
1733 pfd->events = 0;
1734 break;
1735 }
428683db 1736
3fe0b570
AB
1737 ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
1738 buf_size);
1739 if (ret > 0) {
1740 pdc_dr_update(iop->dpp, tp->cpu, ret);
1741 mip->fs_size += ret;
1742 mip->fs_off += ret;
1743 nentries++;
1744 } else if (ret == 0) {
1745 /*
1746 * Short reads after we're done stop us
1747 * from trying reads.
1748 */
1749 if (tp->is_done)
1750 clear_events(pfd);
1751 } else {
1752 read_err(tp->cpu, iop->ifn);
1753 if (errno != EAGAIN || tp->is_done)
1754 clear_events(pfd);
1755 }
1756 nevs--;
e7c9f3ff 1757 }
e7c9f3ff 1758 }
56070ea4 1759
3fe0b570 1760 return nentries;
e7c9f3ff 1761}
52724a0e 1762
055cc3e5
AB
1763static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
1764{
1765 struct stat sb;
1766 int i, nentries = 0;
055cc3e5
AB
1767 struct pollfd *pfd = tp->pfds;
1768 struct io_info *iop = tp->ios;
1769
62d712a7 1770 for (i = 0; i < ndevs; i++, pfd++, iop++) {
055cc3e5
AB
1771 if (pfd->revents & POLLIN || force_read) {
1772 if (fstat(iop->ifd, &sb) < 0) {
1773 perror(iop->ifn);
1774 pfd->events = 0;
1775 } else if (sb.st_size > (off_t)iop->data_queued) {
1776 iop->ready = sb.st_size - iop->data_queued;
1777 iop->data_queued = sb.st_size;
1778
1779 if (!net_sendfile_data(tp, iop)) {
1780 pdc_dr_update(iop->dpp, tp->cpu,
1781 iop->ready);
1782 nentries++;
1783 } else
1784 clear_events(pfd);
1785 }
1786 if (--nevs == 0)
1787 break;
1788 }
1789 }
1790
1791 if (nentries)
1792 incr_entries(nentries);
1793
1794 return nentries;
1795}
1796
1797static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
1798{
1799 int i, nentries = 0;
1800 struct trace_buf *tbp;
1801 struct pollfd *pfd = tp->pfds;
1802 struct io_info *iop = tp->ios;
1803
1804 tbp = alloc_trace_buf(tp->cpu, buf_size);
1805 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1806 if (pfd->revents & POLLIN || force_read) {
1807 tbp->len = read(iop->ifd, tbp->buf, buf_size);
1808 if (tbp->len > 0) {
1809 pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
1810 add_trace_buf(iop->dpp, tp->cpu, &tbp);
1811 nentries++;
1812 } else if (tbp->len == 0) {
1813 /*
1814 * Short reads after we're done stop us
1815 * from trying reads.
1816 */
1817 if (tp->is_done)
1818 clear_events(pfd);
1819 } else {
1820 read_err(tp->cpu, iop->ifn);
1821 if (errno != EAGAIN || tp->is_done)
1822 clear_events(pfd);
1823 }
1824 if (!piped_output && --nevs == 0)
1825 break;
1826 }
1827 }
1828 free(tbp);
1829
1830 if (nentries)
1831 incr_entries(nentries);
1832
1833 return nentries;
1834}
1835
3fe0b570 1836static void *thread_main(void *arg)
8e86c98a 1837{
df81fdb5 1838 int ret, ndone, to_val;
3fe0b570 1839 struct tracer *tp = arg;
8e86c98a 1840
3fe0b570
AB
1841 ret = lock_on_cpu(tp->cpu);
1842 if (ret)
1843 goto err;
ff11d54c 1844
3fe0b570 1845 ret = open_ios(tp);
df81fdb5 1846 if (ret)
3fe0b570 1847 goto err;
6a6d3f0f 1848
3fe0b570
AB
1849 if (piped_output)
1850 to_val = 50; /* Frequent partial handles */
ff11d54c 1851 else
3fe0b570
AB
1852 to_val = 500; /* 1/2 second intervals */
1853
df81fdb5
AB
1854
1855 tracer_signal_ready(tp, Th_running, 0);
1856 tracer_wait_unblock(tp);
6488ca48 1857
3fe0b570
AB
1858 while (!tp->is_done) {
1859 ndone = poll(tp->pfds, ndevs, to_val);
1860 if (ndone || piped_output)
1861 (void)handle_pfds(tp, ndone, piped_output);
1862 else if (ndone < 0 && errno != EINTR)
1863 fprintf(stderr, "Thread %d poll failed: %d/%s\n",
1864 tp->cpu, errno, strerror(errno));
1865 }
22cd0c02
JA
1866
1867 /*
3fe0b570 1868 * Trace is stopped, pull data until we get a short read
22cd0c02 1869 */
3fe0b570
AB
1870 while (handle_pfds(tp, ndevs, 1) > 0)
1871 ;
055cc3e5 1872
3fe0b570 1873 close_ios(tp);
df81fdb5
AB
1874 tracer_signal_ready(tp, Th_leaving, 0);
1875 return NULL;
8e86c98a 1876
3fe0b570 1877err:
df81fdb5 1878 tracer_signal_ready(tp, Th_error, ret);
3fe0b570 1879 return NULL;
22cd0c02
JA
1880}
1881
3fe0b570 1882static int start_tracer(int cpu)
22cd0c02 1883{
3fe0b570 1884 struct tracer *tp;
22cd0c02 1885
3fe0b570
AB
1886 tp = malloc(sizeof(*tp));
1887 memset(tp, 0, sizeof(*tp));
7ab2f837 1888
3fe0b570 1889 INIT_LIST_HEAD(&tp->head);
3fe0b570
AB
1890 tp->status = 0;
1891 tp->cpu = cpu;
8e86c98a 1892
3fe0b570
AB
1893 if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
1894 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
1895 cpu, errno, strerror(errno));
df81fdb5
AB
1896 free(tp);
1897 return 1;
8e86c98a 1898 }
3fe0b570 1899
df81fdb5
AB
1900 list_add_tail(&tp->head, &tracers);
1901 return 0;
8e86c98a
JA
1902}
1903
8772bc4f
JK
1904static int create_output_files(int cpu)
1905{
1906 char fname[MAXPATHLEN + 64];
1907 struct list_head *p;
1908 FILE *f;
1909
1910 __list_for_each(p, &devpaths) {
1911 struct devpath *dpp = list_entry(p, struct devpath, head);
1912
1913 if (fill_ofname(fname, sizeof(fname), NULL, dpp->buts_name,
1914 cpu))
1915 return 1;
1916 f = my_fopen(fname, "w+");
1917 if (!f)
1918 return 1;
1919 fclose(f);
1920 }
1921 return 0;
1922}
1923
df81fdb5 1924static void start_tracers(void)
e0a1988b 1925{
d045a704 1926 int cpu, started = 0;
df81fdb5 1927 struct list_head *p;
d045a704 1928 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
3fe0b570 1929
d045a704 1930 for (cpu = 0; cpu < max_cpus; cpu++) {
8772bc4f
JK
1931 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus)) {
1932 /*
1933 * Create fake empty output files so that other tools
1934 * like blkparse don't have to bother with sparse CPU
1935 * number space.
1936 */
1937 if (create_output_files(cpu))
1938 break;
d045a704 1939 continue;
8772bc4f 1940 }
3fe0b570
AB
1941 if (start_tracer(cpu))
1942 break;
d045a704
JK
1943 started++;
1944 }
e0a1988b 1945
d045a704 1946 wait_tracers_ready(started);
df81fdb5
AB
1947
1948 __list_for_each(p, &tracers) {
1949 struct tracer *tp = list_entry(p, struct tracer, head);
1950 if (tp->status)
1951 fprintf(stderr,
1952 "FAILED to start thread on CPU %d: %d/%s\n",
1953 tp->cpu, tp->status, strerror(tp->status));
1954 }
3fe0b570 1955}
e0a1988b 1956
3fe0b570
AB
1957static void stop_tracers(void)
1958{
1959 struct list_head *p;
e0a1988b
JA
1960
1961 /*
3fe0b570 1962 * Stop the tracing - makes the tracer threads clean up quicker.
e0a1988b 1963 */
3fe0b570
AB
1964 __list_for_each(p, &devpaths) {
1965 struct devpath *dpp = list_entry(p, struct devpath, head);
1966 (void)ioctl(dpp->fd, BLKTRACESTOP);
e0a1988b
JA
1967 }
1968
3fe0b570
AB
1969 /*
1970 * Tell each tracer to quit
1971 */
1972 __list_for_each(p, &tracers) {
1973 struct tracer *tp = list_entry(p, struct tracer, head);
1974 tp->is_done = 1;
1975 }
838361c6 1976 pthread_cond_broadcast(&mt_cond);
ff11d54c 1977}
e0a1988b 1978
3fe0b570 1979static void del_tracers(void)
ff11d54c 1980{
3fe0b570 1981 struct list_head *p, *q;
ff11d54c 1982
3fe0b570
AB
1983 list_for_each_safe(p, q, &tracers) {
1984 struct tracer *tp = list_entry(p, struct tracer, head);
ff11d54c 1985
3fe0b570
AB
1986 list_del(&tp->head);
1987 free(tp);
e0a1988b 1988 }
ff11d54c 1989}
e0a1988b 1990
3fe0b570 1991static void wait_tracers(void)
ff11d54c 1992{
3fe0b570 1993 struct list_head *p;
ff11d54c 1994
3fe0b570
AB
1995 if (use_tracer_devpaths())
1996 process_trace_bufs();
1997
df81fdb5
AB
1998 wait_tracers_leaving();
1999
3fe0b570
AB
2000 __list_for_each(p, &tracers) {
2001 int ret;
2002 struct tracer *tp = list_entry(p, struct tracer, head);
2003
3fe0b570
AB
2004 ret = pthread_join(tp->thread, NULL);
2005 if (ret)
2006 fprintf(stderr, "Thread join %d failed %d\n",
2007 tp->cpu, ret);
ff11d54c
TZ
2008 }
2009
3fe0b570
AB
2010 if (use_tracer_devpaths())
2011 clean_trace_bufs();
2012
2013 get_all_drops();
ff11d54c
TZ
2014}
2015
3fe0b570 2016static void exit_tracing(void)
ff11d54c 2017{
3fe0b570
AB
2018 signal(SIGINT, SIG_IGN);
2019 signal(SIGHUP, SIG_IGN);
2020 signal(SIGTERM, SIG_IGN);
2021 signal(SIGALRM, SIG_IGN);
2022
2023 stop_tracers();
2024 wait_tracers();
2025 del_tracers();
2026 rel_devpaths();
e0a1988b
JA
2027}
2028
3fe0b570 2029static void handle_sigint(__attribute__((__unused__)) int sig)
8e86c98a 2030{
3fe0b570
AB
2031 done = 1;
2032 stop_tracers();
8e86c98a
JA
2033}
2034
3fe0b570 2035static void show_stats(struct list_head *devpaths)
659bcc3f 2036{
3fe0b570
AB
2037 FILE *ofp;
2038 struct list_head *p;
2039 unsigned long long nevents, data_read;
2040 unsigned long long total_drops = 0;
2041 unsigned long long total_events = 0;
2042
2043 if (piped_output)
2044 ofp = my_fopen("/dev/null", "w");
2045 else
2046 ofp = stdout;
ff11d54c 2047
3fe0b570
AB
2048 __list_for_each(p, devpaths) {
2049 int cpu;
2050 struct pdc_stats *sp;
2051 struct devpath *dpp = list_entry(p, struct devpath, head);
e0a1988b 2052
3fe0b570
AB
2053 if (net_mode == Net_server)
2054 printf("server: end of run for %s:%s\n",
2055 dpp->ch->hostname, dpp->buts_name);
e0a1988b 2056
3fe0b570
AB
2057 data_read = 0;
2058 nevents = 0;
2059
2060 fprintf(ofp, "=== %s ===\n", dpp->buts_name);
2061 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
2062 /*
2063 * Estimate events if not known...
2064 */
2065 if (sp->nevents == 0) {
2066 sp->nevents = sp->data_read /
2067 sizeof(struct blk_io_trace);
ff11d54c 2068 }
e0a1988b 2069
3fe0b570
AB
2070 fprintf(ofp,
2071 " CPU%3d: %20llu events, %8llu KiB data\n",
2072 cpu, sp->nevents, (sp->data_read + 1023) >> 10);
e0a1988b 2073
3fe0b570
AB
2074 data_read += sp->data_read;
2075 nevents += sp->nevents;
e0a1988b
JA
2076 }
2077
3fe0b570
AB
2078 fprintf(ofp, " Total: %20llu events (dropped %llu),"
2079 " %8llu KiB data\n", nevents,
2080 dpp->drops, (data_read + 1024) >> 10);
8e86c98a 2081
3fe0b570
AB
2082 total_drops += dpp->drops;
2083 total_events += (nevents + dpp->drops);
8e86c98a
JA
2084 }
2085
3fe0b570
AB
2086 fflush(ofp);
2087 if (piped_output)
2088 fclose(ofp);
8e86c98a 2089
3fe0b570
AB
2090 if (total_drops) {
2091 double drops_ratio = 1.0;
8e86c98a 2092
3fe0b570
AB
2093 if (total_events)
2094 drops_ratio = (double)total_drops/(double)total_events;
8e86c98a 2095
3fe0b570
AB
2096 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
2097 "Consider using a larger buffer size (-b) "
2098 "and/or more buffers (-n)\n",
2099 total_drops, 100.0 * drops_ratio);
8e86c98a 2100 }
8e86c98a
JA
2101}
2102
3fe0b570 2103static int handle_args(int argc, char *argv[])
8e86c98a 2104{
3fe0b570 2105 int c, i;
e3e74029 2106 struct statfs st;
d39c04ca
AB
2107 int act_mask_tmp = 0;
2108
2109 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
2110 switch (c) {
2111 case 'a':
2112 i = find_mask_map(optarg);
2113 if (i < 0) {
3fe0b570 2114 fprintf(stderr, "Invalid action mask %s\n",
d39c04ca 2115 optarg);
7425d456 2116 return 1;
d39c04ca
AB
2117 }
2118 act_mask_tmp |= i;
2119 break;
2120
2121 case 'A':
3fe0b570 2122 if ((sscanf(optarg, "%x", &i) != 1) ||
98f8386b 2123 !valid_act_opt(i)) {
d39c04ca 2124 fprintf(stderr,
ab197ca7 2125 "Invalid set action mask %s/0x%x\n",
d39c04ca 2126 optarg, i);
7425d456 2127 return 1;
d39c04ca
AB
2128 }
2129 act_mask_tmp = i;
2130 break;
d0ca268b 2131
d39c04ca 2132 case 'd':
3fe0b570 2133 if (add_devpath(optarg) != 0)
e7c9f3ff 2134 return 1;
d39c04ca
AB
2135 break;
2136
cf1edb17
AB
2137 case 'I': {
2138 char dev_line[256];
3fe0b570 2139 FILE *ifp = my_fopen(optarg, "r");
cf1edb17
AB
2140
2141 if (!ifp) {
3fe0b570
AB
2142 fprintf(stderr,
2143 "Invalid file for devices %s\n",
cf1edb17
AB
2144 optarg);
2145 return 1;
2146 }
2147
f9a89a6f
ES
2148 while (fscanf(ifp, "%s\n", dev_line) == 1) {
2149 if (add_devpath(dev_line) != 0) {
2150 fclose(ifp);
cf1edb17 2151 return 1;
f9a89a6f
ES
2152 }
2153 }
2154 fclose(ifp);
cf1edb17
AB
2155 break;
2156 }
cf1edb17 2157
5270dddd 2158 case 'r':
3d06efea 2159 debugfs_path = optarg;
5270dddd
JA
2160 break;
2161
d5396421 2162 case 'o':
66efebf8 2163 output_name = optarg;
d5396421 2164 break;
bc39777c
JA
2165 case 'k':
2166 kill_running_trace = 1;
2167 break;
ece238a6
NS
2168 case 'w':
2169 stop_watch = atoi(optarg);
2170 if (stop_watch <= 0) {
2171 fprintf(stderr,
2172 "Invalid stopwatch value (%d secs)\n",
2173 stop_watch);
2174 return 1;
2175 }
2176 break;
57ea8602 2177 case 'V':
5d4f19d9 2178 case 'v':
52724a0e 2179 printf("%s version %s\n", argv[0], blktrace_version);
3fe0b570
AB
2180 exit(0);
2181 /*NOTREACHED*/
129aa440 2182 case 'b':
eb3c8108 2183 buf_size = strtoul(optarg, NULL, 10);
183a0855 2184 if (buf_size <= 0 || buf_size > 16*1024) {
3fe0b570
AB
2185 fprintf(stderr, "Invalid buffer size (%lu)\n",
2186 buf_size);
129aa440
JA
2187 return 1;
2188 }
2189 buf_size <<= 10;
2190 break;
2191 case 'n':
eb3c8108 2192 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
2193 if (buf_nr <= 0) {
2194 fprintf(stderr,
eb3c8108 2195 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
2196 return 1;
2197 }
2198 break;
d1d7f15f
JA
2199 case 'D':
2200 output_dir = optarg;
2201 break;
8e86c98a
JA
2202 case 'h':
2203 net_mode = Net_client;
d324757e
ES
2204 memset(hostname, 0, sizeof(hostname));
2205 strncpy(hostname, optarg, sizeof(hostname));
2206 hostname[sizeof(hostname) - 1] = '\0';
8e86c98a
JA
2207 break;
2208 case 'l':
2209 net_mode = Net_server;
2210 break;
2211 case 'p':
2212 net_port = atoi(optarg);
2213 break;
32f18c48 2214 case 's':
79971f43 2215 net_use_sendfile = 0;
32f18c48 2216 break;
d39c04ca 2217 default:
ee1f4158 2218 show_usage(argv[0]);
3fe0b570
AB
2219 exit(1);
2220 /*NOTREACHED*/
d39c04ca
AB
2221 }
2222 }
2223
3fe0b570
AB
2224 while (optind < argc)
2225 if (add_devpath(argv[optind++]) != 0)
2226 return 1;
8e86c98a 2227
3fe0b570
AB
2228 if (net_mode != Net_server && ndevs == 0) {
2229 show_usage(argv[0]);
2230 return 1;
2231 }
8e86c98a 2232
d8365957 2233 if (statfs(debugfs_path, &st) < 0) {
3fe0b570
AB
2234 fprintf(stderr, "Invalid debug path %s: %d/%s\n",
2235 debugfs_path, errno, strerror(errno));
2236 return 1;
2237 }
2238
d8365957
MP
2239 if (st.f_type != (long)DEBUGFS_TYPE) {
2240 fprintf(stderr, "Debugfs is not mounted at %s\n", debugfs_path);
2241 return 1;
2242 }
2243
3fe0b570
AB
2244 if (act_mask_tmp != 0)
2245 act_mask = act_mask_tmp;
2246
e58f3937
AB
2247 if (net_mode == Net_client && net_setup_addr())
2248 return 1;
2249
3fe0b570
AB
2250 /*
2251 * Set up for appropriate PFD handler based upon output name.
2252 */
2253 if (net_client_use_sendfile())
2254 handle_pfds = handle_pfds_netclient;
2255 else if (net_client_use_send())
2256 handle_pfds = handle_pfds_entries;
2257 else if (output_name && (strcmp(output_name, "-") == 0)) {
2258 piped_output = 1;
2259 handle_pfds = handle_pfds_entries;
2260 pfp = stdout;
ae2dc05e
ES
2261 if (setvbuf(pfp, NULL, _IONBF, 0)) {
2262 perror("setvbuf stdout");
2263 return 1;
2264 }
3fe0b570
AB
2265 } else
2266 handle_pfds = handle_pfds_file;
2267 return 0;
2268}
2269
2270static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
2271 int fd)
2272{
2273 struct cl_conn *nc;
2274
2275 nc = malloc(sizeof(*nc));
2276 memset(nc, 0, sizeof(*nc));
2277
2278 time(&nc->connect_time);
2279 nc->ch = ch;
2280 nc->fd = fd;
2281 nc->ncpus = -1;
2282
2283 list_add_tail(&nc->ch_head, &ch->conn_list);
2284 ch->connects++;
2285
2286 list_add_tail(&nc->ns_head, &ns->conn_list);
2287 ns->connects++;
2288 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2289}
2290
2291static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
2292 struct cl_conn *nc)
2293{
2294 net_close_connection(&nc->fd);
2295
2296 list_del(&nc->ch_head);
2297 ch->connects--;
2298
2299 list_del(&nc->ns_head);
2300 ns->connects--;
2301 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2302
2303 free(nc);
2304}
2305
2306static struct cl_host *net_find_client_host(struct net_server_s *ns,
2307 struct in_addr cl_in_addr)
2308{
2309 struct list_head *p;
2310
2311 __list_for_each(p, &ns->ch_list) {
2312 struct cl_host *ch = list_entry(p, struct cl_host, head);
2313
2314 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
2315 return ch;
2316 }
2317
2318 return NULL;
2319}
2320
2321static struct cl_host *net_add_client_host(struct net_server_s *ns,
2322 struct sockaddr_in *addr)
2323{
2324 struct cl_host *ch;
2325
2326 ch = malloc(sizeof(*ch));
2327 memset(ch, 0, sizeof(*ch));
2328
2329 ch->ns = ns;
2330 ch->cl_in_addr = addr->sin_addr;
2331 list_add_tail(&ch->head, &ns->ch_list);
2332 ns->nchs++;
ec685dd2 2333
3fe0b570
AB
2334 ch->hostname = strdup(inet_ntoa(addr->sin_addr));
2335 printf("server: connection from %s\n", ch->hostname);
2336
2337 INIT_LIST_HEAD(&ch->conn_list);
2338 INIT_LIST_HEAD(&ch->devpaths);
2339
2340 return ch;
2341}
2342
2343static void device_done(struct devpath *dpp, int ncpus)
2344{
2345 int cpu;
2346 struct io_info *iop;
2347
2348 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
2349 close_iop(iop);
2350
2351 list_del(&dpp->head);
2352 dpp_free(dpp);
2353}
2354
2355static void net_ch_remove(struct cl_host *ch, int ncpus)
2356{
2357 struct list_head *p, *q;
2358 struct net_server_s *ns = ch->ns;
2359
2360 list_for_each_safe(p, q, &ch->devpaths) {
2361 struct devpath *dpp = list_entry(p, struct devpath, head);
2362 device_done(dpp, ncpus);
ec685dd2 2363 }
8e86c98a 2364
3fe0b570
AB
2365 list_for_each_safe(p, q, &ch->conn_list) {
2366 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
2367
2368 ch_rem_connection(ns, ch, nc);
22cd0c02
JA
2369 }
2370
3fe0b570
AB
2371 list_del(&ch->head);
2372 ns->nchs--;
2373
2374 if (ch->hostname)
2375 free(ch->hostname);
2376 free(ch);
2377}
2378
2379static void net_add_connection(struct net_server_s *ns)
2380{
2381 int fd;
2382 struct cl_host *ch;
2383 socklen_t socklen = sizeof(ns->addr);
2384
d5302b03 2385 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
3fe0b570
AB
2386 if (fd < 0) {
2387 /*
2388 * This is OK: we just won't accept this connection,
2389 * nothing fatal.
2390 */
2391 perror("accept");
2392 } else {
2393 ch = net_find_client_host(ns, ns->addr.sin_addr);
2394 if (!ch)
2395 ch = net_add_client_host(ns, &ns->addr);
2396
2397 ch_add_connection(ns, ch, fd);
d39c04ca 2398 }
3fe0b570 2399}
d39c04ca 2400
3fe0b570
AB
2401static struct devpath *nc_add_dpp(struct cl_conn *nc,
2402 struct blktrace_net_hdr *bnh,
2403 time_t connect_time)
2404{
2405 int cpu;
2406 struct io_info *iop;
2407 struct devpath *dpp;
2408
2409 dpp = malloc(sizeof(*dpp));
2410 memset(dpp, 0, sizeof(*dpp));
2411
2412 dpp->buts_name = strdup(bnh->buts_name);
2413 dpp->path = strdup(bnh->buts_name);
2414 dpp->fd = -1;
2415 dpp->ch = nc->ch;
2416 dpp->cl_id = bnh->cl_id;
2417 dpp->cl_connect_time = connect_time;
2418 dpp->ncpus = nc->ncpus;
2419 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
2420 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
2421
2422 list_add_tail(&dpp->head, &nc->ch->devpaths);
2423 nc->ch->ndevs++;
2424
2425 dpp->ios = calloc(nc->ncpus, sizeof(*iop));
2426 memset(dpp->ios, 0, ndevs * sizeof(*iop));
2427
2428 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
2429 iop->dpp = dpp;
2430 iop->nc = nc;
2431 init_mmap_info(&iop->mmap_info);
2432
2433 if (iop_open(iop, cpu))
2434 goto err;
69dd57c2
AB
2435 }
2436
3fe0b570 2437 return dpp;
69dd57c2 2438
3fe0b570
AB
2439err:
2440 /*
2441 * Need to unravel what's been done...
2442 */
2443 while (cpu >= 0)
2444 close_iop(&dpp->ios[cpu--]);
2445 dpp_free(dpp);
2446
2447 return NULL;
2448}
d0ca268b 2449
3fe0b570
AB
2450static struct devpath *nc_find_dpp(struct cl_conn *nc,
2451 struct blktrace_net_hdr *bnh)
2452{
2453 struct list_head *p;
2454 time_t connect_time = nc->connect_time;
3d06efea 2455
3fe0b570
AB
2456 __list_for_each(p, &nc->ch->devpaths) {
2457 struct devpath *dpp = list_entry(p, struct devpath, head);
2458
2459 if (!strcmp(dpp->buts_name, bnh->buts_name))
2460 return dpp;
2461
2462 if (dpp->cl_id == bnh->cl_id)
2463 connect_time = dpp->cl_connect_time;
d0ca268b
JA
2464 }
2465
3fe0b570
AB
2466 return nc_add_dpp(nc, bnh, connect_time);
2467}
bc39777c 2468
3fe0b570
AB
2469static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
2470 struct blktrace_net_hdr *bnh)
2471{
2472 int ret;
2473 struct io_info *iop = &dpp->ios[bnh->cpu];
2474 struct mmap_info *mip = &iop->mmap_info;
2475
ae7c049d 2476 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info, NULL)) {
3fe0b570
AB
2477 fprintf(stderr, "ncd(%s:%d): mmap failed\n",
2478 nc->ch->hostname, nc->fd);
2479 exit(1);
2480 }
2481
2482 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
2483 if (ret > 0) {
2484 pdc_dr_update(dpp, bnh->cpu, ret);
2485 mip->fs_size += ret;
2486 mip->fs_off += ret;
2487 } else if (ret < 0)
2488 exit(1);
2489}
2490
2491/*
2492 * Returns 1 if we closed a host - invalidates other polling information
2493 * that may be present.
2494 */
2495static int net_client_data(struct cl_conn *nc)
2496{
2497 int ret;
2498 struct devpath *dpp;
2499 struct blktrace_net_hdr bnh;
2500
2501 ret = net_get_header(nc, &bnh);
2502 if (ret == 0)
7425d456 2503 return 0;
3fe0b570
AB
2504
2505 if (ret < 0) {
2506 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
2507 exit(1);
2508 }
2509
2510 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
2511 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
2512 exit(1);
2513 }
2514
2515 if (!data_is_native) {
2516 bnh.magic = be32_to_cpu(bnh.magic);
2517 bnh.cpu = be32_to_cpu(bnh.cpu);
2518 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
2519 bnh.len = be32_to_cpu(bnh.len);
2520 bnh.cl_id = be32_to_cpu(bnh.cl_id);
2521 bnh.buf_size = be32_to_cpu(bnh.buf_size);
2522 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
2523 bnh.page_size = be32_to_cpu(bnh.page_size);
2524 }
2525
2526 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2527 fprintf(stderr, "ncd(%s:%d): bad data magic\n",
2528 nc->ch->hostname, nc->fd);
2529 exit(1);
2530 }
2531
2532 if (nc->ncpus == -1)
2533 nc->ncpus = bnh.max_cpus;
2534
2535 /*
2536 * len == 0 means the other end is sending us a new connection/dpp
2537 * len == 1 means that the other end signalled end-of-run
2538 */
2539 dpp = nc_find_dpp(nc, &bnh);
2540 if (bnh.len == 0) {
2541 /*
2542 * Just adding in the dpp above is enough
2543 */
2544 ack_open_close(nc->fd, dpp->buts_name);
2545 nc->ch->cl_opens++;
2546 } else if (bnh.len == 1) {
2547 /*
2548 * overload cpu count with dropped events
2549 */
2550 dpp->drops = bnh.cpu;
2551
2552 ack_open_close(nc->fd, dpp->buts_name);
2553 if (--nc->ch->cl_opens == 0) {
2554 show_stats(&nc->ch->devpaths);
2555 net_ch_remove(nc->ch, nc->ncpus);
2556 return 1;
2557 }
2558 } else
2559 net_client_read_data(nc, dpp, &bnh);
2560
2561 return 0;
2562}
2563
2564static void handle_client_data(struct net_server_s *ns, int events)
2565{
2566 struct cl_conn *nc;
2567 struct pollfd *pfd;
2568 struct list_head *p, *q;
2569
2570 pfd = &ns->pfds[1];
2571 list_for_each_safe(p, q, &ns->conn_list) {
2572 if (pfd->revents & POLLIN) {
2573 nc = list_entry(p, struct cl_conn, ns_head);
2574
2575 if (net_client_data(nc) || --events == 0)
2576 break;
2577 }
2578 pfd++;
2579 }
2580}
2581
2582static void net_setup_pfds(struct net_server_s *ns)
2583{
2584 struct pollfd *pfd;
2585 struct list_head *p;
2586
2587 ns->pfds[0].fd = ns->listen_fd;
2588 ns->pfds[0].events = POLLIN;
2589
2590 pfd = &ns->pfds[1];
2591 __list_for_each(p, &ns->conn_list) {
2592 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
2593
2594 pfd->fd = nc->fd;
2595 pfd->events = POLLIN;
2596 pfd++;
2597 }
2598}
2599
2600static int net_server_handle_connections(struct net_server_s *ns)
2601{
2602 int events;
2603
2604 printf("server: waiting for connections...\n");
2605
2606 while (!done) {
2607 net_setup_pfds(ns);
2608 events = poll(ns->pfds, ns->connects + 1, -1);
2609 if (events < 0) {
2610 if (errno != EINTR) {
2611 perror("FATAL: poll error");
2612 return 1;
2613 }
2614 } else if (events > 0) {
2615 if (ns->pfds[0].revents & POLLIN) {
2616 net_add_connection(ns);
2617 events--;
2618 }
2619
2620 if (events)
2621 handle_client_data(ns, events);
2622 }
2623 }
2624
2625 return 0;
2626}
2627
2628static int net_server(void)
2629{
2630 int fd, opt;
2631 int ret = 1;
2632 struct net_server_s net_server;
2633 struct net_server_s *ns = &net_server;
2634
2635 memset(ns, 0, sizeof(*ns));
2636 INIT_LIST_HEAD(&ns->ch_list);
2637 INIT_LIST_HEAD(&ns->conn_list);
2638 ns->pfds = malloc(sizeof(struct pollfd));
2639
2640 fd = my_socket(AF_INET, SOCK_STREAM, 0);
2641 if (fd < 0) {
2642 perror("server: socket");
2643 goto out;
2644 }
2645
2646 opt = 1;
2647 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
2648 perror("setsockopt");
2649 goto out;
2650 }
2651
2652 memset(&ns->addr, 0, sizeof(ns->addr));
2653 ns->addr.sin_family = AF_INET;
2654 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
2655 ns->addr.sin_port = htons(net_port);
2656
2657 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
2658 perror("bind");
2659 goto out;
2660 }
2661
2662 if (listen(fd, 1) < 0) {
2663 perror("listen");
2664 goto out;
2665 }
2666
2667 /*
2668 * The actual server looping is done here:
2669 */
2670 ns->listen_fd = fd;
2671 ret = net_server_handle_connections(ns);
2672
2673 /*
2674 * Clean up and return...
2675 */
2676out:
2677 free(ns->pfds);
2678 return ret;
2679}
2680
055cc3e5
AB
2681static int run_tracers(void)
2682{
2683 atexit(exit_tracing);
2684 if (net_mode == Net_client)
2685 printf("blktrace: connecting to %s\n", hostname);
2686
ab6809de
JA
2687 if (setup_buts())
2688 return 1;
055cc3e5
AB
2689
2690 if (use_tracer_devpaths()) {
2691 if (setup_tracer_devpaths())
2692 return 1;
2693
2694 if (piped_output)
2695 handle_list = handle_list_file;
2696 else
2697 handle_list = handle_list_net;
2698 }
2699
2700 start_tracers();
2701 if (nthreads_running == ncpus) {
2702 unblock_tracers();
2703 start_buts();
2704 if (net_mode == Net_client)
2705 printf("blktrace: connected!\n");
2706 if (stop_watch)
2707 alarm(stop_watch);
2708 } else
2709 stop_tracers();
2710
2711 wait_tracers();
2712 if (nthreads_running == ncpus)
2713 show_stats(&devpaths);
2714 if (net_client_use_send())
2715 close_client_connections();
2716 del_tracers();
2717
2718 return 0;
2719}
2720
d045a704
JK
2721static cpu_set_t *get_online_cpus(void)
2722{
2723 FILE *cpus;
2724 cpu_set_t *set;
2725 size_t alloc_size;
2726 int cpuid, prevcpuid = -1;
2727 char nextch;
2728 int n, ncpu, curcpu = 0;
2729 int *cpu_nums;
2730
2731 ncpu = sysconf(_SC_NPROCESSORS_CONF);
2732 if (ncpu < 0)
2733 return NULL;
2734
2735 cpu_nums = malloc(sizeof(int)*ncpu);
2736 if (!cpu_nums) {
2737 errno = ENOMEM;
2738 return NULL;
2739 }
2740
2741 /*
2742 * There is no way to easily get maximum CPU number. So we have to
2743 * parse the file first to find it out and then create appropriate
2744 * cpuset
2745 */
2746 cpus = my_fopen("/sys/devices/system/cpu/online", "r");
2747 for (;;) {
2748 n = fscanf(cpus, "%d%c", &cpuid, &nextch);
2749 if (n <= 0)
2750 break;
2751 if (n == 2 && nextch == '-') {
2752 prevcpuid = cpuid;
2753 continue;
2754 }
2755 if (prevcpuid == -1)
2756 prevcpuid = cpuid;
2757 while (prevcpuid <= cpuid) {
2758 /* More CPUs listed than configured? */
2759 if (curcpu >= ncpu) {
2760 errno = EINVAL;
2761 return NULL;
2762 }
2763 cpu_nums[curcpu++] = prevcpuid++;
2764 }
2765 prevcpuid = -1;
2766 }
2767 fclose(cpus);
2768
2769 ncpu = curcpu;
2770 max_cpus = cpu_nums[ncpu - 1] + 1;
2771
2772 /* Now that we have maximum cpu number, create a cpuset */
2773 set = CPU_ALLOC(max_cpus);
2774 if (!set) {
2775 errno = ENOMEM;
2776 return NULL;
2777 }
2778 alloc_size = CPU_ALLOC_SIZE(max_cpus);
2779 CPU_ZERO_S(alloc_size, set);
2780
2781 for (curcpu = 0; curcpu < ncpu; curcpu++)
2782 CPU_SET_S(cpu_nums[curcpu], alloc_size, set);
2783
2784 free(cpu_nums);
2785
2786 return set;
2787}
2788
3fe0b570
AB
2789int main(int argc, char *argv[])
2790{
2791 int ret = 0;
2792
2793 setlocale(LC_NUMERIC, "en_US");
2794 pagesize = getpagesize();
d045a704
JK
2795 online_cpus = get_online_cpus();
2796 if (!online_cpus) {
2797 fprintf(stderr, "cannot get online cpus %d/%s\n",
3fe0b570
AB
2798 errno, strerror(errno));
2799 ret = 1;
2800 goto out;
055cc3e5 2801 } else if (handle_args(argc, argv)) {
3fe0b570
AB
2802 ret = 1;
2803 goto out;
bc39777c
JA
2804 }
2805
d045a704 2806 ncpus = CPU_COUNT_S(CPU_ALLOC_SIZE(max_cpus), online_cpus);
ce2151eb
AB
2807 if (ndevs > 1 && output_name && strcmp(output_name, "-") != 0) {
2808 fprintf(stderr, "-o not supported with multiple devices\n");
2809 ret = 1;
2810 goto out;
2811 }
2812
d0ca268b
JA
2813 signal(SIGINT, handle_sigint);
2814 signal(SIGHUP, handle_sigint);
2815 signal(SIGTERM, handle_sigint);
ece238a6 2816 signal(SIGALRM, handle_sigint);
38e1f0c6 2817 signal(SIGPIPE, SIG_IGN);
d0ca268b 2818
3fe0b570
AB
2819 if (kill_running_trace) {
2820 struct devpath *dpp;
2821 struct list_head *p;
8e86c98a 2822
3fe0b570
AB
2823 __list_for_each(p, &devpaths) {
2824 dpp = list_entry(p, struct devpath, head);
2825 if (__stop_trace(dpp->fd)) {
2826 fprintf(stderr,
2827 "BLKTRACETEARDOWN %s failed: %d/%s\n",
2828 dpp->path, errno, strerror(errno));
2829 }
2830 }
2831 } else if (net_mode == Net_server) {
2832 if (output_name) {
2833 fprintf(stderr, "-o ignored in server mode\n");
2834 output_name = NULL;
2835 }
3fe0b570 2836 ret = net_server();
055cc3e5
AB
2837 } else
2838 ret = run_tracers();
d0ca268b 2839
3fe0b570
AB
2840out:
2841 if (pfp)
2842 fclose(pfp);
2843 rel_devpaths();
2844 return ret;
2845}