blktrace: Reorganize creation of output file name
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd 4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
46e37c55 5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
d956a2cd 6 *
3fe0b570
AB
7 * Rewrite to have a single thread per CPU (managing all devices on that CPU)
8 * Alan D. Brunelle <alan.brunelle@hp.com> - January 2009
9 *
d956a2cd
JA
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
d0ca268b 24 */
3fe0b570
AB
25
26#include <errno.h>
27#include <stdarg.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <fcntl.h>
32#include <getopt.h>
33#include <sched.h>
d0ca268b 34#include <unistd.h>
3fe0b570 35#include <poll.h>
d0ca268b 36#include <signal.h>
3fe0b570
AB
37#include <pthread.h>
38#include <locale.h>
d0ca268b 39#include <sys/ioctl.h>
3fe0b570
AB
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/vfs.h>
b7106311 43#include <sys/mman.h>
3fe0b570
AB
44#include <sys/param.h>
45#include <sys/time.h>
46#include <sys/resource.h>
8e86c98a 47#include <sys/socket.h>
8e86c98a
JA
48#include <netinet/in.h>
49#include <arpa/inet.h>
50#include <netdb.h>
32f18c48 51#include <sys/sendfile.h>
d0ca268b 52
3fe0b570 53#include "btt/list.h"
d0ca268b 54#include "blktrace.h"
52724a0e 55
8f551a39
JA
56/*
57 * You may want to increase this even more, if you are logging at a high
58 * rate and see skipped/missed events
59 */
3fe0b570
AB
60#define BUF_SIZE (512 * 1024)
61#define BUF_NR (4)
62
63#define FILE_VBUF_SIZE (128 * 1024)
64
65#define DEBUGFS_TYPE (0x64626720)
66#define TRACE_NET_PORT (8462)
67
68enum {
69 Net_none = 0,
70 Net_server,
71 Net_client,
72};
73
df81fdb5
AB
74enum thread_status {
75 Th_running,
76 Th_leaving,
77 Th_error
78};
79
3fe0b570
AB
80/*
81 * Generic stats collected: nevents can be _roughly_ estimated by data_read
82 * (discounting pdu...)
83 *
84 * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
85 */
86struct pdc_stats {
87 unsigned long long data_read;
88 unsigned long long nevents;
89};
90
91struct devpath {
92 struct list_head head;
93 char *path; /* path to device special file */
94 char *buts_name; /* name returned from bt kernel code */
95 struct pdc_stats *stats;
70598a36 96 int fd, ncpus;
3fe0b570
AB
97 unsigned long long drops;
98
99 /*
100 * For piped output only:
101 *
102 * Each tracer will have a tracer_devpath_head that it will add new
103 * data onto. It's list is protected above (tracer_devpath_head.mutex)
104 * and it will signal the processing thread using the dp_cond,
105 * dp_mutex & dp_entries variables above.
106 */
107 struct tracer_devpath_head *heads;
108
109 /*
110 * For network server mode only:
111 */
112 struct cl_host *ch;
113 u32 cl_id;
114 time_t cl_connect_time;
115 struct io_info *ios;
116};
117
118/*
119 * For piped output to stdout we will have each tracer thread (one per dev)
120 * tack buffers read from the relay queues on a per-device list.
121 *
122 * The main thread will then collect trace buffers from each of lists in turn.
123 *
124 * We will use a mutex to guard each of the trace_buf list. The tracers
125 * can then signal the main thread using <dp_cond,dp_mutex> and
126 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
127 * signal. When dp_entries is 0, the main thread will wait for that condition
128 * to be signalled.)
129 *
130 * adb: It may be better just to have a large buffer per tracer per dev,
131 * and then use it as a ring-buffer. This would certainly cut down a lot
132 * of malloc/free thrashing, at the cost of more memory movements (potentially).
133 */
134struct trace_buf {
135 struct list_head head;
136 struct devpath *dpp;
137 void *buf;
138 int cpu, len;
139};
140
141struct tracer_devpath_head {
142 pthread_mutex_t mutex;
143 struct list_head head;
144 struct trace_buf *prev;
145};
146
147/*
148 * Used to handle the mmap() interfaces for output file (containing traces)
149 */
150struct mmap_info {
151 void *fs_buf;
152 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
153 unsigned long buf_size, buf_nr;
154 int pagesize;
155};
156
157/*
158 * Each thread doing work on a (client) side of blktrace will have one
159 * of these. The ios array contains input/output information, pfds holds
160 * poll() data. The volatile's provide flags to/from the main executing
161 * thread.
162 */
163struct tracer {
164 struct list_head head;
165 struct io_info *ios;
166 struct pollfd *pfds;
167 pthread_t thread;
3fe0b570 168 int cpu, nios;
df81fdb5 169 volatile int status, is_done;
3fe0b570
AB
170};
171
172/*
173 * networking stuff follows. we include a magic number so we know whether
174 * to endianness convert or not.
175 *
176 * The len field is overloaded:
177 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu
178 * 1 - Indicates a "close" - Shut down connection orderly
179 *
180 * The cpu field is overloaded on close: it will contain the number of drops.
181 */
182struct blktrace_net_hdr {
183 u32 magic; /* same as trace magic */
184 char buts_name[32]; /* trace name */
185 u32 cpu; /* for which cpu */
186 u32 max_cpus;
187 u32 len; /* length of following trace data */
188 u32 cl_id; /* id for set of client per-cpu connections */
189 u32 buf_size; /* client buf_size for this trace */
190 u32 buf_nr; /* client buf_nr for this trace */
191 u32 page_size; /* client page_size for this trace */
192};
193
194/*
195 * Each host encountered has one of these. The head is used to link this
196 * on to the network server's ch_list. Connections associated with this
197 * host are linked on conn_list, and any devices traced on that host
198 * are connected on the devpaths list.
199 */
200struct cl_host {
201 struct list_head head;
202 struct list_head conn_list;
203 struct list_head devpaths;
204 struct net_server_s *ns;
205 char *hostname;
206 struct in_addr cl_in_addr;
207 int connects, ndevs, cl_opens;
208};
209
210/*
211 * Each connection (client to server socket ('fd')) has one of these. A
212 * back reference to the host ('ch'), and lists headers (for the host
213 * list, and the network server conn_list) are also included.
214 */
215struct cl_conn {
216 struct list_head ch_head, ns_head;
217 struct cl_host *ch;
218 int fd, ncpus;
219 time_t connect_time;
220};
221
222/*
223 * The network server requires some poll structures to be maintained -
224 * one per conection currently on conn_list. The nchs/ch_list values
225 * are for each host connected to this server. The addr field is used
226 * for scratch as new connections are established.
227 */
228struct net_server_s {
229 struct list_head conn_list;
230 struct list_head ch_list;
231 struct pollfd *pfds;
232 int listen_fd, connects, nchs;
233 struct sockaddr_in addr;
234};
235
236/*
237 * This structure is (generically) used to providide information
238 * for a read-to-write set of values.
239 *
240 * ifn & ifd represent input information
241 *
242 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
243 */
244struct io_info {
245 struct devpath *dpp;
246 FILE *ofp;
247 char *obuf;
248 struct cl_conn *nc; /* Server network connection */
249
250 /*
251 * mmap controlled output files
252 */
253 struct mmap_info mmap_info;
254
255 /*
256 * Client network fields
257 */
258 unsigned int ready;
259 unsigned long long data_queued;
260
261 /*
262 * Input/output file descriptors & names
263 */
264 int ifd, ofd;
265 char ifn[MAXPATHLEN + 64];
266 char ofn[MAXPATHLEN + 64];
267};
268
269static char blktrace_version[] = "2.0.0";
270
271/*
272 * Linkage to blktrace helper routines (trace conversions)
273 */
274int data_is_native = -1;
275
055cc3e5 276static int ndevs;
d045a704 277static int max_cpus;
3fe0b570 278static int ncpus;
d045a704 279static cpu_set_t *online_cpus;
3fe0b570
AB
280static int pagesize;
281static int act_mask = ~0U;
055cc3e5
AB
282static int kill_running_trace;
283static int stop_watch;
284static int piped_output;
285
3fe0b570
AB
286static char *debugfs_path = "/sys/kernel/debug";
287static char *output_name;
288static char *output_dir;
055cc3e5 289
3fe0b570
AB
290static unsigned long buf_size = BUF_SIZE;
291static unsigned long buf_nr = BUF_NR;
055cc3e5
AB
292
293static FILE *pfp;
294
3fe0b570
AB
295static LIST_HEAD(devpaths);
296static LIST_HEAD(tracers);
055cc3e5 297
3fe0b570 298static volatile int done;
d0ca268b 299
6488ca48
AB
300/*
301 * tracer threads add entries, the main thread takes them off and processes
302 * them. These protect the dp_entries variable.
303 */
3fe0b570
AB
304static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
305static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
306static volatile int dp_entries;
307
6488ca48 308/*
df81fdb5 309 * These synchronize master / thread interactions.
6488ca48 310 */
df81fdb5
AB
311static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER;
312static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER;
313static volatile int nthreads_running;
314static volatile int nthreads_leaving;
315static volatile int nthreads_error;
316static volatile int tracers_run;
6488ca48 317
3fe0b570
AB
318/*
319 * network cmd line params
320 */
e58f3937 321static struct sockaddr_in hostname_addr;
3fe0b570
AB
322static char hostname[MAXHOSTNAMELEN];
323static int net_port = TRACE_NET_PORT;
324static int net_use_sendfile = 1;
325static int net_mode;
326static int *cl_fds;
007c233c 327
3fe0b570
AB
328static int (*handle_pfds)(struct tracer *, int, int);
329static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
e3e74029 330
5d4f19d9 331#define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
d5396421 332static struct option l_opts[] = {
5c86134e 333 {
d39c04ca 334 .name = "dev",
428683db 335 .has_arg = required_argument,
d39c04ca
AB
336 .flag = NULL,
337 .val = 'd'
338 },
cf1edb17
AB
339 {
340 .name = "input-devs",
341 .has_arg = required_argument,
342 .flag = NULL,
343 .val = 'I'
344 },
5c86134e 345 {
d39c04ca 346 .name = "act-mask",
428683db 347 .has_arg = required_argument,
d39c04ca
AB
348 .flag = NULL,
349 .val = 'a'
350 },
5c86134e 351 {
d39c04ca 352 .name = "set-mask",
428683db 353 .has_arg = required_argument,
d39c04ca
AB
354 .flag = NULL,
355 .val = 'A'
356 },
5c86134e 357 {
5270dddd 358 .name = "relay",
428683db 359 .has_arg = required_argument,
5270dddd
JA
360 .flag = NULL,
361 .val = 'r'
362 },
d5396421
JA
363 {
364 .name = "output",
428683db 365 .has_arg = required_argument,
d5396421
JA
366 .flag = NULL,
367 .val = 'o'
368 },
bc39777c
JA
369 {
370 .name = "kill",
428683db 371 .has_arg = no_argument,
bc39777c
JA
372 .flag = NULL,
373 .val = 'k'
374 },
ece238a6
NS
375 {
376 .name = "stopwatch",
428683db 377 .has_arg = required_argument,
ece238a6
NS
378 .flag = NULL,
379 .val = 'w'
380 },
5d4f19d9
JA
381 {
382 .name = "version",
383 .has_arg = no_argument,
384 .flag = NULL,
385 .val = 'v'
386 },
52724a0e
JA
387 {
388 .name = "version",
389 .has_arg = no_argument,
390 .flag = NULL,
57ea8602 391 .val = 'V'
52724a0e 392 },
129aa440 393 {
3f65c585 394 .name = "buffer-size",
129aa440
JA
395 .has_arg = required_argument,
396 .flag = NULL,
397 .val = 'b'
398 },
399 {
3f65c585 400 .name = "num-sub-buffers",
129aa440
JA
401 .has_arg = required_argument,
402 .flag = NULL,
403 .val = 'n'
404 },
d1d7f15f 405 {
3f65c585 406 .name = "output-dir",
d1d7f15f
JA
407 .has_arg = required_argument,
408 .flag = NULL,
409 .val = 'D'
410 },
8e86c98a
JA
411 {
412 .name = "listen",
413 .has_arg = no_argument,
414 .flag = NULL,
415 .val = 'l'
416 },
417 {
418 .name = "host",
419 .has_arg = required_argument,
420 .flag = NULL,
421 .val = 'h'
422 },
423 {
424 .name = "port",
425 .has_arg = required_argument,
426 .flag = NULL,
427 .val = 'p'
428 },
32f18c48 429 {
79971f43 430 .name = "no-sendfile",
32f18c48
JA
431 .has_arg = no_argument,
432 .flag = NULL,
433 .val = 's'
434 },
71ef8b7c
JA
435 {
436 .name = NULL,
437 }
d39c04ca
AB
438};
439
b9a7e9fc
ES
440static char usage_str[] = "\n\n" \
441 "-d <dev> | --dev=<dev>\n" \
442 "[ -r <debugfs path> | --relay=<debugfs path> ]\n" \
443 "[ -o <file> | --output=<file>]\n" \
444 "[ -D <dir> | --output-dir=<dir>\n" \
445 "[ -w <time> | --stopwatch=<time>]\n" \
446 "[ -a <action field> | --act-mask=<action field>]\n" \
447 "[ -A <action mask> | --set-mask=<action mask>]\n" \
448 "[ -b <size> | --buffer-size]\n" \
449 "[ -n <number> | --num-sub-buffers=<number>]\n" \
450 "[ -l | --listen]\n" \
451 "[ -h <hostname> | --host=<hostname>]\n" \
452 "[ -p <port number> | --port=<port number>]\n" \
453 "[ -s | --no-sendfile]\n" \
454 "[ -I <devs file> | --input-devs=<devs file>]\n" \
455 "[ -v <version> | --version]\n" \
456 "[ -V <version> | --version]\n" \
457
3fe0b570
AB
458 "\t-d Use specified device. May also be given last after options\n" \
459 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
460 "\t-o File(s) to send output to\n" \
461 "\t-D Directory to prepend to output file names\n" \
3fe0b570
AB
462 "\t-w Stop after defined time, in seconds\n" \
463 "\t-a Only trace specified actions. See documentation\n" \
464 "\t-A Give trace mask as a single value. See documentation\n" \
4fad2b83
JT
465 "\t-b Sub buffer size in KiB (default 512)\n" \
466 "\t-n Number of sub buffers (default 4)\n" \
3fe0b570
AB
467 "\t-l Run in network listen mode (blktrace server)\n" \
468 "\t-h Run in network client mode, connecting to the given host\n" \
469 "\t-p Network port to use (default 8462)\n" \
470 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
471 "\t-I Add devices found in <devs file>\n" \
b9a7e9fc 472 "\t-v Print program version info\n" \
3fe0b570 473 "\t-V Print program version info\n\n";
9db17354 474
3fe0b570
AB
475static void clear_events(struct pollfd *pfd)
476{
477 pfd->events = 0;
478 pfd->revents = 0;
479}
21f55651 480
3fe0b570
AB
481static inline int net_client_use_sendfile(void)
482{
483 return net_mode == Net_client && net_use_sendfile;
484}
21f55651 485
3fe0b570
AB
486static inline int net_client_use_send(void)
487{
488 return net_mode == Net_client && !net_use_sendfile;
489}
b9d4294e 490
3fe0b570
AB
491static inline int use_tracer_devpaths(void)
492{
493 return piped_output || net_client_use_send();
494}
b9d4294e 495
3fe0b570
AB
496static inline int in_addr_eq(struct in_addr a, struct in_addr b)
497{
498 return a.s_addr == b.s_addr;
499}
007c233c 500
3fe0b570
AB
501static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
502{
503 dpp->stats[cpu].data_read += data_read;
504}
0cc7d25e 505
3fe0b570
AB
506static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
507{
508 dpp->stats[cpu].nevents += nevents;
509}
9db17354 510
3fe0b570
AB
511static void show_usage(char *prog)
512{
bc14c53f 513 fprintf(stderr, "Usage: %s %s", prog, usage_str);
3fe0b570 514}
9db17354 515
df81fdb5
AB
516/*
517 * Create a timespec 'msec' milliseconds into the future
518 */
519static inline void make_timespec(struct timespec *tsp, long delta_msec)
520{
521 struct timeval now;
522
523 gettimeofday(&now, NULL);
524 tsp->tv_sec = now.tv_sec;
525 tsp->tv_nsec = 1000L * now.tv_usec;
526
527 tsp->tv_nsec += (delta_msec * 1000000L);
528 if (tsp->tv_nsec > 1000000000L) {
529 long secs = tsp->tv_nsec / 1000000000L;
530
531 tsp->tv_sec += secs;
532 tsp->tv_nsec -= (secs * 1000000000L);
533 }
534}
535
536/*
537 * Add a timer to ensure wait ends
538 */
539static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
540{
541 struct timespec ts;
542
543 make_timespec(&ts, 50);
544 pthread_cond_timedwait(cond, mutex, &ts);
545}
546
547static void unblock_tracers(void)
548{
549 pthread_mutex_lock(&mt_mutex);
550 tracers_run = 1;
551 pthread_cond_broadcast(&mt_cond);
552 pthread_mutex_unlock(&mt_mutex);
553}
554
555static void tracer_wait_unblock(struct tracer *tp)
556{
557 pthread_mutex_lock(&mt_mutex);
558 while (!tp->is_done && !tracers_run)
559 pthread_cond_wait(&mt_cond, &mt_mutex);
560 pthread_mutex_unlock(&mt_mutex);
561}
562
563static void tracer_signal_ready(struct tracer *tp,
564 enum thread_status th_status,
565 int status)
566{
567 pthread_mutex_lock(&mt_mutex);
568 tp->status = status;
569
570 if (th_status == Th_running)
571 nthreads_running++;
572 else if (th_status == Th_error)
573 nthreads_error++;
574 else
575 nthreads_leaving++;
576
577 pthread_cond_signal(&mt_cond);
578 pthread_mutex_unlock(&mt_mutex);
579}
580
581static void wait_tracers_ready(int ncpus_started)
582{
583 pthread_mutex_lock(&mt_mutex);
584 while ((nthreads_running + nthreads_error) < ncpus_started)
585 t_pthread_cond_wait(&mt_cond, &mt_mutex);
586 pthread_mutex_unlock(&mt_mutex);
587}
588
589static void wait_tracers_leaving(void)
590{
591 pthread_mutex_lock(&mt_mutex);
592 while (nthreads_leaving < nthreads_running)
593 t_pthread_cond_wait(&mt_cond, &mt_mutex);
594 pthread_mutex_unlock(&mt_mutex);
595}
596
3fe0b570
AB
597static void init_mmap_info(struct mmap_info *mip)
598{
599 mip->buf_size = buf_size;
600 mip->buf_nr = buf_nr;
601 mip->pagesize = pagesize;
602}
b7106311 603
3fe0b570
AB
604static void net_close_connection(int *fd)
605{
606 shutdown(*fd, SHUT_RDWR);
607 close(*fd);
608 *fd = -1;
609}
ff11d54c 610
3fe0b570
AB
611static void dpp_free(struct devpath *dpp)
612{
613 if (dpp->stats)
614 free(dpp->stats);
615 if (dpp->ios)
616 free(dpp->ios);
617 if (dpp->path)
618 free(dpp->path);
619 if (dpp->buts_name)
620 free(dpp->buts_name);
621 free(dpp);
622}
d0ca268b 623
3fe0b570
AB
624static int lock_on_cpu(int cpu)
625{
0a915aab
NZ
626 cpu_set_t * cpu_mask;
627 size_t size;
d045a704
JK
628
629 cpu_mask = CPU_ALLOC(max_cpus);
630 size = CPU_ALLOC_SIZE(max_cpus);
0a915aab
NZ
631
632 CPU_ZERO_S(size, cpu_mask);
633 CPU_SET_S(cpu, size, cpu_mask);
634 if (sched_setaffinity(0, size, cpu_mask) < 0) {
635 CPU_FREE(cpu_mask);
3fe0b570 636 return errno;
0a915aab 637 }
d0ca268b 638
0a915aab 639 CPU_FREE(cpu_mask);
3fe0b570
AB
640 return 0;
641}
e7c9f3ff 642
3fe0b570
AB
643static int increase_limit(int resource, rlim_t increase)
644{
645 struct rlimit rlim;
646 int save_errno = errno;
8e86c98a 647
3fe0b570
AB
648 if (!getrlimit(resource, &rlim)) {
649 rlim.rlim_cur += increase;
650 if (rlim.rlim_cur >= rlim.rlim_max)
651 rlim.rlim_max = rlim.rlim_cur + increase;
72ca8801 652
3fe0b570
AB
653 if (!setrlimit(resource, &rlim))
654 return 1;
655 }
99c1f5ab 656
3fe0b570
AB
657 errno = save_errno;
658 return 0;
659}
e0a1988b 660
3fe0b570
AB
661static int handle_open_failure(void)
662{
663 if (errno == ENFILE || errno == EMFILE)
664 return increase_limit(RLIMIT_NOFILE, 16);
665 return 0;
666}
99c1f5ab 667
3fe0b570
AB
668static int handle_mem_failure(size_t length)
669{
670 if (errno == ENFILE)
671 return handle_open_failure();
672 else if (errno == ENOMEM)
673 return increase_limit(RLIMIT_MEMLOCK, 2 * length);
674 return 0;
675}
99c1f5ab 676
3fe0b570
AB
677static FILE *my_fopen(const char *path, const char *mode)
678{
679 FILE *fp;
8e86c98a 680
3fe0b570
AB
681 do {
682 fp = fopen(path, mode);
683 } while (fp == NULL && handle_open_failure());
8e86c98a 684
3fe0b570
AB
685 return fp;
686}
8e86c98a 687
3fe0b570
AB
688static int my_open(const char *path, int flags)
689{
690 int fd;
8e86c98a 691
3fe0b570
AB
692 do {
693 fd = open(path, flags);
694 } while (fd < 0 && handle_open_failure());
e0a1988b 695
3fe0b570
AB
696 return fd;
697}
ff11d54c 698
3fe0b570
AB
699static int my_socket(int domain, int type, int protocol)
700{
701 int fd;
ff11d54c 702
3fe0b570
AB
703 do {
704 fd = socket(domain, type, protocol);
705 } while (fd < 0 && handle_open_failure());
8e86c98a 706
3fe0b570
AB
707 return fd;
708}
709
d5302b03
AB
710static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
711{
712 int fd;
713
714 do {
715 fd = accept(sockfd, addr, addrlen);
716 } while (fd < 0 && handle_open_failure());
717
718 return fd;
719}
720
3fe0b570
AB
721static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
722 off_t offset)
69dd57c2 723{
3fe0b570 724 void *new;
69dd57c2 725
3fe0b570
AB
726 do {
727 new = mmap(addr, length, prot, flags, fd, offset);
728 } while (new == MAP_FAILED && handle_mem_failure(length));
729
730 return new;
731}
732
ae7c049d
TM
733static int my_mlock(struct tracer *tp,
734 const void *addr, size_t len)
3fe0b570 735{
ae7c049d 736 int ret, retry = 0;
3fe0b570
AB
737
738 do {
739 ret = mlock(addr, len);
ae7c049d
TM
740 if ((retry >= 10) && tp && tp->is_done)
741 break;
742 retry++;
3fe0b570
AB
743 } while (ret < 0 && handle_mem_failure(len));
744
745 return ret;
746}
747
ae7c049d
TM
748static int setup_mmap(int fd, unsigned int maxlen,
749 struct mmap_info *mip,
750 struct tracer *tp)
055cc3e5
AB
751{
752 if (mip->fs_off + maxlen > mip->fs_buf_len) {
753 unsigned long nr = max(16, mip->buf_nr);
754
755 if (mip->fs_buf) {
756 munlock(mip->fs_buf, mip->fs_buf_len);
757 munmap(mip->fs_buf, mip->fs_buf_len);
758 mip->fs_buf = NULL;
759 }
760
761 mip->fs_off = mip->fs_size & (mip->pagesize - 1);
762 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
763 mip->fs_max_size += mip->fs_buf_len;
764
765 if (ftruncate(fd, mip->fs_max_size) < 0) {
766 perror("setup_mmap: ftruncate");
767 return 1;
768 }
769
770 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
771 MAP_SHARED, fd,
772 mip->fs_size - mip->fs_off);
773 if (mip->fs_buf == MAP_FAILED) {
774 perror("setup_mmap: mmap");
775 return 1;
776 }
ae7c049d
TM
777 if (my_mlock(tp, mip->fs_buf, mip->fs_buf_len) < 0) {
778 perror("setup_mlock: mlock");
779 return 1;
780 }
055cc3e5
AB
781 }
782
783 return 0;
784}
785
3fe0b570
AB
786static int __stop_trace(int fd)
787{
788 /*
789 * Should be stopped, don't complain if it isn't
790 */
791 ioctl(fd, BLKTRACESTOP);
792 return ioctl(fd, BLKTRACETEARDOWN);
793}
794
795static int write_data(char *buf, int len)
796{
797 int ret;
798
799rewrite:
800 ret = fwrite(buf, len, 1, pfp);
801 if (ferror(pfp) || ret != 1) {
802 if (errno == EINTR) {
803 clearerr(pfp);
804 goto rewrite;
805 }
806
807 if (!piped_output || (errno != EPIPE && errno != EBADF)) {
808 fprintf(stderr, "write(%d) failed: %d/%s\n",
809 len, errno, strerror(errno));
69dd57c2 810 }
3fe0b570 811 goto err;
69dd57c2
AB
812 }
813
3fe0b570 814 fflush(pfp);
69dd57c2 815 return 0;
3fe0b570
AB
816
817err:
818 clearerr(pfp);
819 return 1;
69dd57c2
AB
820}
821
822/*
3fe0b570 823 * Returns the number of bytes read (successfully)
69dd57c2 824 */
3fe0b570 825static int __net_recv_data(int fd, void *buf, unsigned int len)
69dd57c2 826{
3fe0b570
AB
827 unsigned int bytes_left = len;
828
829 while (bytes_left && !done) {
830 int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
831
832 if (ret == 0)
833 break;
834 else if (ret < 0) {
5d65b5e6
AB
835 if (errno == EAGAIN) {
836 usleep(50);
837 continue;
838 }
839 perror("server: net_recv_data: recv failed");
055cc3e5 840 break;
3fe0b570
AB
841 } else {
842 buf += ret;
843 bytes_left -= ret;
844 }
845 }
69dd57c2 846
3fe0b570 847 return len - bytes_left;
69dd57c2
AB
848}
849
3fe0b570 850static int net_recv_data(int fd, void *buf, unsigned int len)
8e86c98a 851{
3fe0b570
AB
852 return __net_recv_data(fd, buf, len);
853}
7035d92d 854
3fe0b570
AB
855/*
856 * Returns number of bytes written
857 */
858static int net_send_data(int fd, void *buf, unsigned int buf_len)
859{
860 int ret;
861 unsigned int bytes_left = buf_len;
862
863 while (bytes_left) {
864 ret = send(fd, buf, bytes_left, 0);
865 if (ret < 0) {
866 perror("send");
867 break;
868 }
869
870 buf += ret;
871 bytes_left -= ret;
7035d92d
JA
872 }
873
3fe0b570 874 return buf_len - bytes_left;
8e86c98a
JA
875}
876
3fe0b570 877static int net_send_header(int fd, int cpu, char *buts_name, int len)
eb3c8108 878{
3fe0b570 879 struct blktrace_net_hdr hdr;
eb3c8108 880
3fe0b570 881 memset(&hdr, 0, sizeof(hdr));
eb3c8108 882
3fe0b570 883 hdr.magic = BLK_IO_TRACE_MAGIC;
d324757e 884 memset(hdr.buts_name, 0, sizeof(hdr.buts_name));
3fe0b570 885 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
d324757e 886 hdr.buts_name[sizeof(hdr.buts_name) - 1] = '\0';
3fe0b570 887 hdr.cpu = cpu;
d045a704 888 hdr.max_cpus = max_cpus;
3fe0b570
AB
889 hdr.len = len;
890 hdr.cl_id = getpid();
891 hdr.buf_size = buf_size;
892 hdr.buf_nr = buf_nr;
893 hdr.page_size = pagesize;
eb3c8108 894
3fe0b570
AB
895 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
896}
eb3c8108 897
3fe0b570
AB
898static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
899{
900 struct blktrace_net_hdr ret_hdr;
eb3c8108 901
3fe0b570
AB
902 net_send_header(fd, cpu, buts_name, len);
903 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
904}
eb3c8108 905
3fe0b570
AB
906static void net_send_open(int fd, int cpu, char *buts_name)
907{
908 net_send_open_close(fd, cpu, buts_name, 0);
eb3c8108
JA
909}
910
3fe0b570 911static void net_send_close(int fd, char *buts_name, int drops)
d0ca268b 912{
3fe0b570
AB
913 /*
914 * Overload CPU w/ number of drops
915 *
916 * XXX: Need to clear/set done around call - done=1 (which
917 * is true here) stops reads from happening... :-(
918 */
919 done = 0;
920 net_send_open_close(fd, drops, buts_name, 1);
921 done = 1;
922}
d0ca268b 923
3fe0b570
AB
924static void ack_open_close(int fd, char *buts_name)
925{
926 net_send_header(fd, 0, buts_name, 2);
927}
d0ca268b 928
3fe0b570
AB
929static void net_send_drops(int fd)
930{
931 struct list_head *p;
ed71a31e 932
3fe0b570
AB
933 __list_for_each(p, &devpaths) {
934 struct devpath *dpp = list_entry(p, struct devpath, head);
935
936 net_send_close(fd, dpp->buts_name, dpp->drops);
d0ca268b 937 }
3fe0b570 938}
d0ca268b 939
3fe0b570
AB
940/*
941 * Returns:
055cc3e5
AB
942 * 0: "EOF"
943 * 1: OK
944 * -1: Error
3fe0b570
AB
945 */
946static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
947{
948 int bytes_read;
949 int fl = fcntl(nc->fd, F_GETFL);
950
951 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
952 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
953 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
954
955 if (bytes_read == sizeof(*bnh))
956 return 1;
957 else if (bytes_read == 0)
958 return 0;
055cc3e5
AB
959 else
960 return -1;
d0ca268b
JA
961}
962
e58f3937 963static int net_setup_addr(void)
d0ca268b 964{
e58f3937 965 struct sockaddr_in *addr = &hostname_addr;
cf9208ea 966
e58f3937
AB
967 memset(addr, 0, sizeof(*addr));
968 addr->sin_family = AF_INET;
969 addr->sin_port = htons(net_port);
3fe0b570 970
e58f3937
AB
971 if (inet_aton(hostname, &addr->sin_addr) != 1) {
972 struct hostent *hent;
973retry:
974 hent = gethostbyname(hostname);
3fe0b570 975 if (!hent) {
e58f3937
AB
976 if (h_errno == TRY_AGAIN) {
977 usleep(100);
978 goto retry;
979 } else if (h_errno == NO_RECOVERY) {
980 fprintf(stderr, "gethostbyname(%s)"
981 "non-recoverable error encountered\n",
982 hostname);
983 } else {
984 /*
985 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA
986 */
987 fprintf(stderr, "Host %s not found\n",
988 hostname);
989 }
3fe0b570
AB
990 return 1;
991 }
992
e58f3937 993 memcpy(&addr->sin_addr, hent->h_addr, 4);
d324757e
ES
994 memset(hostname, 0, sizeof(hostname));
995 strncpy(hostname, hent->h_name, sizeof(hostname));
996 hostname[sizeof(hostname) - 1] = '\0';
3fe0b570 997 }
7035d92d 998
e58f3937
AB
999 return 0;
1000}
1001
1002static int net_setup_client(void)
1003{
1004 int fd;
1005 struct sockaddr_in *addr = &hostname_addr;
1006
3fe0b570
AB
1007 fd = my_socket(AF_INET, SOCK_STREAM, 0);
1008 if (fd < 0) {
1009 perror("client: socket");
1010 return -1;
1011 }
cf9208ea 1012
e58f3937 1013 if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
3fe0b570
AB
1014 if (errno == ECONNREFUSED)
1015 fprintf(stderr,
1016 "\nclient: Connection to %s refused, "
1017 "perhaps the server is not started?\n\n",
1018 hostname);
1019 else
1020 perror("client: connect");
055cc3e5 1021
3fe0b570
AB
1022 close(fd);
1023 return -1;
707b0914 1024 }
3fe0b570
AB
1025
1026 return fd;
d0ca268b
JA
1027}
1028
3fe0b570 1029static int open_client_connections(void)
e7c9f3ff 1030{
3fe0b570 1031 int cpu;
d045a704 1032 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
e7c9f3ff 1033
3fe0b570 1034 cl_fds = calloc(ncpus, sizeof(*cl_fds));
d045a704
JK
1035 for (cpu = 0; cpu < max_cpus; cpu++) {
1036 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1037 continue;
3fe0b570
AB
1038 cl_fds[cpu] = net_setup_client();
1039 if (cl_fds[cpu] < 0)
1040 goto err;
eb3c8108 1041 }
3fe0b570
AB
1042 return 0;
1043
1044err:
1045 while (cpu > 0)
1046 close(cl_fds[cpu--]);
1047 free(cl_fds);
1048 return 1;
e7c9f3ff
NS
1049}
1050
3fe0b570 1051static void close_client_connections(void)
eb3c8108 1052{
3fe0b570
AB
1053 if (cl_fds) {
1054 int cpu, *fdp;
d045a704 1055 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
eb3c8108 1056
d045a704
JK
1057 for (cpu = 0, fdp = cl_fds; cpu < max_cpus; cpu++, fdp++) {
1058 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1059 continue;
3fe0b570
AB
1060 if (*fdp >= 0) {
1061 net_send_drops(*fdp);
1062 net_close_connection(fdp);
1063 }
7934e668 1064 }
3fe0b570 1065 free(cl_fds);
ff11d54c 1066 }
eb3c8108
JA
1067}
1068
3fe0b570 1069static void setup_buts(void)
d0ca268b 1070{
3fe0b570 1071 struct list_head *p;
bbabf03a 1072
3fe0b570
AB
1073 __list_for_each(p, &devpaths) {
1074 struct blk_user_trace_setup buts;
1075 struct devpath *dpp = list_entry(p, struct devpath, head);
ae9f71b3 1076
3fe0b570
AB
1077 memset(&buts, 0, sizeof(buts));
1078 buts.buf_size = buf_size;
1079 buts.buf_nr = buf_nr;
1080 buts.act_mask = act_mask;
1081
055cc3e5 1082 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) {
d045a704 1083 dpp->ncpus = max_cpus;
3b552a2d
AB
1084 dpp->buts_name = strdup(buts.name);
1085 if (dpp->stats)
1086 free(dpp->stats);
1087 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
1088 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
055cc3e5
AB
1089 } else
1090 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
1091 dpp->path, errno, strerror(errno));
3b552a2d
AB
1092 }
1093}
1094
1095static void start_buts(void)
1096{
1097 struct list_head *p;
1098
1099 __list_for_each(p, &devpaths) {
1100 struct devpath *dpp = list_entry(p, struct devpath, head);
1101
1102 if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
3fe0b570
AB
1103 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
1104 dpp->path, errno, strerror(errno));
bbabf03a 1105 }
3fe0b570 1106 }
8a43bac5
JA
1107}
1108
3fe0b570 1109static int get_drops(struct devpath *dpp)
8e86c98a 1110{
3fe0b570
AB
1111 int fd, drops = 0;
1112 char fn[MAXPATHLEN + 64], tmp[256];
8e86c98a 1113
3fe0b570
AB
1114 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
1115 dpp->buts_name);
8e86c98a 1116
3fe0b570
AB
1117 fd = my_open(fn, O_RDONLY);
1118 if (fd < 0) {
1119 /*
1120 * This may be ok: the kernel may not support
1121 * dropped counts.
1122 */
1123 if (errno != ENOENT)
1124 fprintf(stderr, "Could not open %s: %d/%s\n",
1125 fn, errno, strerror(errno));
1126 return 0;
1127 } else if (read(fd, tmp, sizeof(tmp)) < 0) {
1128 fprintf(stderr, "Could not read %s: %d/%s\n",
1129 fn, errno, strerror(errno));
1130 } else
1131 drops = atoi(tmp);
1132 close(fd);
8e86c98a 1133
3fe0b570 1134 return drops;
8e86c98a
JA
1135}
1136
3fe0b570 1137static void get_all_drops(void)
a3e4d330 1138{
3fe0b570 1139 struct list_head *p;
21f55651 1140
3fe0b570
AB
1141 __list_for_each(p, &devpaths) {
1142 struct devpath *dpp = list_entry(p, struct devpath, head);
055cc3e5 1143
3fe0b570 1144 dpp->drops = get_drops(dpp);
21f55651 1145 }
9db17354 1146}
eb3c8108 1147
3fe0b570 1148static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
9db17354 1149{
3fe0b570 1150 struct trace_buf *tbp;
21f55651 1151
3fe0b570
AB
1152 tbp = malloc(sizeof(*tbp) + bufsize);
1153 INIT_LIST_HEAD(&tbp->head);
1154 tbp->len = 0;
1155 tbp->buf = (void *)(tbp + 1);
1156 tbp->cpu = cpu;
1157 tbp->dpp = NULL; /* Will be set when tbp is added */
21f55651 1158
3fe0b570 1159 return tbp;
a3e4d330
JA
1160}
1161
3fe0b570 1162static void free_tracer_heads(struct devpath *dpp)
b7106311 1163{
3fe0b570
AB
1164 int cpu;
1165 struct tracer_devpath_head *hd;
b7106311 1166
d045a704 1167 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
3fe0b570
AB
1168 if (hd->prev)
1169 free(hd->prev);
055cc3e5 1170
3fe0b570
AB
1171 pthread_mutex_destroy(&hd->mutex);
1172 }
1173 free(dpp->heads);
1174}
b7106311 1175
3fe0b570
AB
1176static int setup_tracer_devpaths(void)
1177{
1178 struct list_head *p;
b7106311 1179
3fe0b570
AB
1180 if (net_client_use_send())
1181 if (open_client_connections())
1182 return 1;
b7106311 1183
3fe0b570
AB
1184 __list_for_each(p, &devpaths) {
1185 int cpu;
1186 struct tracer_devpath_head *hd;
1187 struct devpath *dpp = list_entry(p, struct devpath, head);
b7106311 1188
d045a704
JK
1189 dpp->heads = calloc(max_cpus, sizeof(struct tracer_devpath_head));
1190 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
3fe0b570
AB
1191 INIT_LIST_HEAD(&hd->head);
1192 pthread_mutex_init(&hd->mutex, NULL);
1193 hd->prev = NULL;
1194 }
b7106311
JA
1195 }
1196
3fe0b570 1197 return 0;
b7106311
JA
1198}
1199
3fe0b570
AB
1200static inline void add_trace_buf(struct devpath *dpp, int cpu,
1201 struct trace_buf **tbpp)
18eed2a7 1202{
3fe0b570
AB
1203 struct trace_buf *tbp = *tbpp;
1204 struct tracer_devpath_head *hd = &dpp->heads[cpu];
18eed2a7 1205
3fe0b570 1206 tbp->dpp = dpp;
2f064793 1207
3fe0b570
AB
1208 pthread_mutex_lock(&hd->mutex);
1209 list_add_tail(&tbp->head, &hd->head);
1210 pthread_mutex_unlock(&hd->mutex);
18eed2a7 1211
3fe0b570 1212 *tbpp = alloc_trace_buf(cpu, buf_size);
18eed2a7
JA
1213}
1214
3fe0b570 1215static inline void incr_entries(int entries_handled)
a3e4d330 1216{
3fe0b570
AB
1217 pthread_mutex_lock(&dp_mutex);
1218 if (dp_entries == 0)
1219 pthread_cond_signal(&dp_cond);
1220 dp_entries += entries_handled;
1221 pthread_mutex_unlock(&dp_mutex);
a3e4d330
JA
1222}
1223
055cc3e5
AB
1224static void decr_entries(int handled)
1225{
1226 pthread_mutex_lock(&dp_mutex);
1227 dp_entries -= handled;
1228 pthread_mutex_unlock(&dp_mutex);
1229}
1230
1231static int wait_empty_entries(void)
1232{
1233 pthread_mutex_lock(&dp_mutex);
1234 while (!done && dp_entries == 0)
1235 t_pthread_cond_wait(&dp_cond, &dp_mutex);
1236 pthread_mutex_unlock(&dp_mutex);
1237
1238 return !done;
1239}
1240
3fe0b570 1241static int add_devpath(char *path)
8e86c98a 1242{
3fe0b570
AB
1243 int fd;
1244 struct devpath *dpp;
4b747a40 1245 struct list_head *p;
3fe0b570 1246
4b747a40
ES
1247 /*
1248 * Verify device is not duplicated
1249 */
1250 __list_for_each(p, &devpaths) {
1251 struct devpath *tmp = list_entry(p, struct devpath, head);
1252 if (!strcmp(tmp->path, path))
1253 return 0;
1254 }
8e86c98a 1255 /*
3fe0b570 1256 * Verify device is valid before going too far
8e86c98a 1257 */
3fe0b570
AB
1258 fd = my_open(path, O_RDONLY | O_NONBLOCK);
1259 if (fd < 0) {
1260 fprintf(stderr, "Invalid path %s specified: %d/%s\n",
1261 path, errno, strerror(errno));
1262 return 1;
1263 }
8e86c98a 1264
3fe0b570
AB
1265 dpp = malloc(sizeof(*dpp));
1266 memset(dpp, 0, sizeof(*dpp));
1267 dpp->path = strdup(path);
1268 dpp->fd = fd;
70598a36 1269 ndevs++;
3fe0b570 1270 list_add_tail(&dpp->head, &devpaths);
8e86c98a 1271
3fe0b570 1272 return 0;
8e86c98a
JA
1273}
1274
3fe0b570 1275static void rel_devpaths(void)
a3e4d330 1276{
3fe0b570 1277 struct list_head *p, *q;
a3e4d330 1278
3fe0b570
AB
1279 list_for_each_safe(p, q, &devpaths) {
1280 struct devpath *dpp = list_entry(p, struct devpath, head);
a3e4d330 1281
3fe0b570
AB
1282 list_del(&dpp->head);
1283 __stop_trace(dpp->fd);
1284 close(dpp->fd);
a3e4d330 1285
3fe0b570
AB
1286 if (dpp->heads)
1287 free_tracer_heads(dpp);
a3e4d330 1288
3fe0b570
AB
1289 dpp_free(dpp);
1290 ndevs--;
b7106311 1291 }
8e86c98a 1292}
b7106311 1293
3fe0b570 1294static int flush_subbuf_net(struct trace_buf *tbp)
8e86c98a 1295{
3fe0b570
AB
1296 int fd = cl_fds[tbp->cpu];
1297 struct devpath *dpp = tbp->dpp;
b7106311 1298
3fe0b570
AB
1299 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
1300 return 1;
055cc3e5 1301 else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
3fe0b570 1302 return 1;
a3e4d330 1303
8e86c98a 1304 return 0;
a3e4d330
JA
1305}
1306
3fe0b570
AB
1307static int
1308handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
1309 struct list_head *list)
8e86c98a 1310{
3fe0b570
AB
1311 struct trace_buf *tbp;
1312 struct list_head *p, *q;
1313 int entries_handled = 0;
8e86c98a 1314
3fe0b570
AB
1315 list_for_each_safe(p, q, list) {
1316 tbp = list_entry(p, struct trace_buf, head);
8e86c98a 1317
3fe0b570
AB
1318 list_del(&tbp->head);
1319 entries_handled++;
6a752c90 1320
3fe0b570
AB
1321 if (cl_fds[tbp->cpu] >= 0) {
1322 if (flush_subbuf_net(tbp)) {
1323 close(cl_fds[tbp->cpu]);
1324 cl_fds[tbp->cpu] = -1;
1325 }
1326 }
7ab2f837 1327
3fe0b570 1328 free(tbp);
7934e668
JA
1329 }
1330
3fe0b570 1331 return entries_handled;
6a752c90
JA
1332}
1333
055cc3e5
AB
1334/*
1335 * Tack 'tbp's buf onto the tail of 'prev's buf
1336 */
1337static struct trace_buf *tb_combine(struct trace_buf *prev,
1338 struct trace_buf *tbp)
1339{
1340 unsigned long tot_len;
1341
1342 tot_len = prev->len + tbp->len;
1343 if (tot_len > buf_size) {
1344 /*
1345 * tbp->head isn't connected (it was 'prev'
1346 * so it had been taken off of the list
1347 * before). Therefore, we can realloc
1348 * the whole structures, as the other fields
1349 * are "static".
1350 */
d8365957 1351 prev = realloc(prev, sizeof(*prev) + tot_len);
055cc3e5
AB
1352 prev->buf = (void *)(prev + 1);
1353 }
1354
1355 memcpy(prev->buf + prev->len, tbp->buf, tbp->len);
1356 prev->len = tot_len;
1357
1358 free(tbp);
1359 return prev;
1360}
1361
3fe0b570
AB
1362static int handle_list_file(struct tracer_devpath_head *hd,
1363 struct list_head *list)
f6fead25 1364{
3fe0b570
AB
1365 int off, t_len, nevents;
1366 struct blk_io_trace *t;
1367 struct list_head *p, *q;
1368 int entries_handled = 0;
1369 struct trace_buf *tbp, *prev;
11629347 1370
3fe0b570
AB
1371 prev = hd->prev;
1372 list_for_each_safe(p, q, list) {
1373 tbp = list_entry(p, struct trace_buf, head);
1374 list_del(&tbp->head);
1375 entries_handled++;
18eed2a7 1376
3fe0b570
AB
1377 /*
1378 * If there was some leftover before, tack this new
1379 * entry onto the tail of the previous one.
1380 */
055cc3e5
AB
1381 if (prev)
1382 tbp = tb_combine(prev, tbp);
ff11d54c 1383
3fe0b570
AB
1384 /*
1385 * See how many whole traces there are - send them
1386 * all out in one go.
1387 */
1388 off = 0;
1389 nevents = 0;
1390 while (off + (int)sizeof(*t) <= tbp->len) {
1391 t = (struct blk_io_trace *)(tbp->buf + off);
1392 t_len = sizeof(*t) + t->pdu_len;
1393 if (off + t_len > tbp->len)
1394 break;
ff11d54c 1395
3fe0b570
AB
1396 off += t_len;
1397 nevents++;
1398 }
1399 if (nevents)
1400 pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
4aeec019 1401
3fe0b570
AB
1402 /*
1403 * Write any full set of traces, any remaining data is kept
1404 * for the next pass.
1405 */
1406 if (off) {
055cc3e5 1407 if (write_data(tbp->buf, off) || off == tbp->len) {
3fe0b570 1408 free(tbp);
055cc3e5
AB
1409 prev = NULL;
1410 }
3fe0b570
AB
1411 else {
1412 /*
1413 * Move valid data to beginning of buffer
1414 */
1415 tbp->len -= off;
1416 memmove(tbp->buf, tbp->buf + off, tbp->len);
1417 prev = tbp;
1418 }
1419 } else
1420 prev = tbp;
ff11d54c 1421 }
3fe0b570 1422 hd->prev = prev;
ff11d54c 1423
3fe0b570 1424 return entries_handled;
ff11d54c
TZ
1425}
1426
3fe0b570 1427static void __process_trace_bufs(void)
8a43bac5 1428{
3fe0b570
AB
1429 int cpu;
1430 struct list_head *p;
1431 struct list_head list;
1432 int handled = 0;
1433
1434 __list_for_each(p, &devpaths) {
1435 struct devpath *dpp = list_entry(p, struct devpath, head);
1436 struct tracer_devpath_head *hd = dpp->heads;
1437
d045a704 1438 for (cpu = 0; cpu < max_cpus; cpu++, hd++) {
3fe0b570
AB
1439 pthread_mutex_lock(&hd->mutex);
1440 if (list_empty(&hd->head)) {
1441 pthread_mutex_unlock(&hd->mutex);
1442 continue;
1443 }
8a43bac5 1444
3fe0b570
AB
1445 list_replace_init(&hd->head, &list);
1446 pthread_mutex_unlock(&hd->mutex);
6480258a 1447
3fe0b570
AB
1448 handled += handle_list(hd, &list);
1449 }
d0ca268b
JA
1450 }
1451
055cc3e5
AB
1452 if (handled)
1453 decr_entries(handled);
8a43bac5
JA
1454}
1455
3fe0b570 1456static void process_trace_bufs(void)
8a43bac5 1457{
055cc3e5 1458 while (wait_empty_entries())
3fe0b570 1459 __process_trace_bufs();
3fe0b570 1460}
3a9d6c13 1461
3fe0b570
AB
1462static void clean_trace_bufs(void)
1463{
3a9d6c13 1464 /*
3fe0b570
AB
1465 * No mutex needed here: we're only reading from the lists,
1466 * tracers are done
3a9d6c13 1467 */
3fe0b570
AB
1468 while (dp_entries)
1469 __process_trace_bufs();
1470}
4b5db44a 1471
3fe0b570
AB
1472static inline void read_err(int cpu, char *ifn)
1473{
1474 if (errno != EAGAIN)
1475 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
1476 cpu, ifn, errno, strerror(errno));
4b5db44a
JA
1477}
1478
3fe0b570 1479static int net_sendfile(struct io_info *iop)
d5396421 1480{
3fe0b570 1481 int ret;
d5396421 1482
3fe0b570
AB
1483 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
1484 if (ret < 0) {
1485 perror("sendfile");
1486 return 1;
1487 } else if (ret < (int)iop->ready) {
1488 fprintf(stderr, "short sendfile send (%d of %d)\n",
1489 ret, iop->ready);
1490 return 1;
1491 }
91816d54 1492
9db17354 1493 return 0;
91816d54
JA
1494}
1495
3fe0b570 1496static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
d0ca268b 1497{
3fe0b570 1498 struct devpath *dpp = iop->dpp;
d0ca268b 1499
3fe0b570
AB
1500 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
1501 return 1;
1502 return net_sendfile(iop);
1503}
d0ca268b 1504
d7a1f726
JK
1505static int fill_ofname(char *dst, int dstlen, char *subdir, char *buts_name,
1506 int cpu)
8e86c98a 1507{
3fe0b570 1508 int len;
e3bf54d8 1509 struct stat sb;
8e86c98a
JA
1510
1511 if (output_dir)
d7a1f726 1512 len = snprintf(dst, dstlen, "%s/", output_dir);
dd870ef6 1513 else
d7a1f726 1514 len = snprintf(dst, dstlen, "./");
8e86c98a 1515
d7a1f726
JK
1516 if (subdir)
1517 len += snprintf(dst + len, dstlen - len, "%s", subdir);
e0a1988b 1518
d7a1f726 1519 if (stat(dst, &sb) < 0) {
e3bf54d8 1520 if (errno != ENOENT) {
3fe0b570
AB
1521 fprintf(stderr,
1522 "Destination dir %s stat failed: %d/%s\n",
d7a1f726 1523 dst, errno, strerror(errno));
e3bf54d8
JA
1524 return 1;
1525 }
60886290
JM
1526 /*
1527 * There is no synchronization between multiple threads
1528 * trying to create the directory at once. It's harmless
1529 * to let them try, so just detect the problem and move on.
1530 */
d7a1f726 1531 if (mkdir(dst, 0755) < 0 && errno != EEXIST) {
3fe0b570
AB
1532 fprintf(stderr,
1533 "Destination dir %s can't be made: %d/%s\n",
d7a1f726 1534 dst, errno, strerror(errno));
e3bf54d8
JA
1535 return 1;
1536 }
1537 }
1538
8e86c98a 1539 if (output_name)
d7a1f726 1540 snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
3fe0b570 1541 output_name, cpu);
8e86c98a 1542 else
d7a1f726
JK
1543 snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
1544 buts_name, cpu);
e3bf54d8
JA
1545
1546 return 0;
8e86c98a
JA
1547}
1548
3fe0b570 1549static int set_vbuf(struct io_info *iop, int mode, size_t size)
0cc7d25e 1550{
3fe0b570
AB
1551 iop->obuf = malloc(size);
1552 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
1553 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
1554 iop->dpp->path, (int)size, errno,
1555 strerror(errno));
1556 free(iop->obuf);
ddf22842
JA
1557 return 1;
1558 }
d5396421 1559
ddf22842
JA
1560 return 0;
1561}
007c233c 1562
3fe0b570 1563static int iop_open(struct io_info *iop, int cpu)
ddf22842 1564{
d7a1f726
JK
1565 char hostdir[MAXPATHLEN + 64];
1566
3fe0b570 1567 iop->ofd = -1;
d7a1f726
JK
1568 if (net_mode == Net_server) {
1569 struct cl_conn *nc = iop->nc;
1570 int len;
1571
1572 len = snprintf(hostdir, sizeof(hostdir), "%s-",
1573 nc->ch->hostname);
1574 len += strftime(hostdir + len, sizeof(hostdir) - len, "%F-%T/",
1575 gmtime(&iop->dpp->cl_connect_time));
1576 } else {
1577 hostdir[0] = 0;
1578 }
1579
1580 if (fill_ofname(iop->ofn, sizeof(iop->ofn), hostdir,
1581 iop->dpp->buts_name, cpu))
3fe0b570 1582 return 1;
0cc7d25e 1583
3fe0b570
AB
1584 iop->ofp = my_fopen(iop->ofn, "w+");
1585 if (iop->ofp == NULL) {
1586 fprintf(stderr, "Open output file %s failed: %d/%s\n",
1587 iop->ofn, errno, strerror(errno));
1588 return 1;
1589 }
055cc3e5 1590
3fe0b570
AB
1591 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
1592 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
1593 iop->ofn, errno, strerror(errno));
1594 fclose(iop->ofp);
1595 return 1;
d0ca268b
JA
1596 }
1597
3fe0b570 1598 iop->ofd = fileno(iop->ofp);
e7c9f3ff 1599 return 0;
d0ca268b
JA
1600}
1601
df81fdb5
AB
1602static void close_iop(struct io_info *iop)
1603{
1604 struct mmap_info *mip = &iop->mmap_info;
1605
1606 if (mip->fs_buf)
1607 munmap(mip->fs_buf, mip->fs_buf_len);
1608
1609 if (!piped_output) {
1610 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
1611 fprintf(stderr,
1612 "Ignoring err: ftruncate(%s): %d/%s\n",
1613 iop->ofn, errno, strerror(errno));
1614 }
1615 }
1616
1617 if (iop->ofp)
1618 fclose(iop->ofp);
1619 if (iop->obuf)
1620 free(iop->obuf);
1621}
1622
1623static void close_ios(struct tracer *tp)
1624{
1625 while (tp->nios > 0) {
1626 struct io_info *iop = &tp->ios[--tp->nios];
1627
1628 iop->dpp->drops = get_drops(iop->dpp);
1629 if (iop->ifd >= 0)
1630 close(iop->ifd);
1631
1632 if (iop->ofp)
1633 close_iop(iop);
1634 else if (iop->ofd >= 0) {
1635 struct devpath *dpp = iop->dpp;
1636
1637 net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
1638 net_close_connection(&iop->ofd);
1639 }
1640 }
1641
1642 free(tp->ios);
1643 free(tp->pfds);
1644}
1645
3fe0b570 1646static int open_ios(struct tracer *tp)
3aabcd89 1647{
3fe0b570
AB
1648 struct pollfd *pfd;
1649 struct io_info *iop;
1650 struct list_head *p;
1651
1652 tp->ios = calloc(ndevs, sizeof(struct io_info));
3fe0b570 1653 memset(tp->ios, 0, ndevs * sizeof(struct io_info));
055cc3e5
AB
1654
1655 tp->pfds = calloc(ndevs, sizeof(struct pollfd));
3fe0b570
AB
1656 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
1657
1658 tp->nios = 0;
1659 iop = tp->ios;
1660 pfd = tp->pfds;
1661 __list_for_each(p, &devpaths) {
1662 struct devpath *dpp = list_entry(p, struct devpath, head);
1663
1664 iop->dpp = dpp;
1665 iop->ofd = -1;
1666 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
1667 debugfs_path, dpp->buts_name, tp->cpu);
1668
1669 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
1670 if (iop->ifd < 0) {
1671 fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
1672 tp->cpu, iop->ifn, errno, strerror(errno));
1673 return 1;
1674 }
1675
1676 init_mmap_info(&iop->mmap_info);
1677
1678 pfd->fd = iop->ifd;
1679 pfd->events = POLLIN;
1680
1681 if (piped_output)
1682 ;
1683 else if (net_client_use_sendfile()) {
1684 iop->ofd = net_setup_client();
1685 if (iop->ofd < 0)
1686 goto err;
1687 net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
1688 } else if (net_mode == Net_none) {
1689 if (iop_open(iop, tp->cpu))
1690 goto err;
1691 } else {
1692 /*
1693 * This ensures that the server knows about all
1694 * connections & devices before _any_ closes
1695 */
1696 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
1697 }
007c233c 1698
3fe0b570
AB
1699 pfd++;
1700 iop++;
1701 tp->nios++;
9db17354 1702 }
3aabcd89 1703
3fe0b570 1704 return 0;
72ca8801 1705
3fe0b570
AB
1706err:
1707 close(iop->ifd); /* tp->nios _not_ bumped */
df81fdb5 1708 close_ios(tp);
3fe0b570 1709 return 1;
e7c9f3ff
NS
1710}
1711
3fe0b570 1712static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
e7c9f3ff 1713{
3fe0b570
AB
1714 struct mmap_info *mip;
1715 int i, ret, nentries = 0;
1716 struct pollfd *pfd = tp->pfds;
1717 struct io_info *iop = tp->ios;
1718
1719 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
1720 if (pfd->revents & POLLIN || force_read) {
1721 mip = &iop->mmap_info;
1722
ae7c049d 1723 ret = setup_mmap(iop->ofd, buf_size, mip, tp);
3fe0b570
AB
1724 if (ret < 0) {
1725 pfd->events = 0;
1726 break;
1727 }
428683db 1728
3fe0b570
AB
1729 ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
1730 buf_size);
1731 if (ret > 0) {
1732 pdc_dr_update(iop->dpp, tp->cpu, ret);
1733 mip->fs_size += ret;
1734 mip->fs_off += ret;
1735 nentries++;
1736 } else if (ret == 0) {
1737 /*
1738 * Short reads after we're done stop us
1739 * from trying reads.
1740 */
1741 if (tp->is_done)
1742 clear_events(pfd);
1743 } else {
1744 read_err(tp->cpu, iop->ifn);
1745 if (errno != EAGAIN || tp->is_done)
1746 clear_events(pfd);
1747 }
1748 nevs--;
e7c9f3ff 1749 }
e7c9f3ff 1750 }
56070ea4 1751
3fe0b570 1752 return nentries;
e7c9f3ff 1753}
52724a0e 1754
055cc3e5
AB
1755static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
1756{
1757 struct stat sb;
1758 int i, nentries = 0;
055cc3e5
AB
1759 struct pollfd *pfd = tp->pfds;
1760 struct io_info *iop = tp->ios;
1761
62d712a7 1762 for (i = 0; i < ndevs; i++, pfd++, iop++) {
055cc3e5
AB
1763 if (pfd->revents & POLLIN || force_read) {
1764 if (fstat(iop->ifd, &sb) < 0) {
1765 perror(iop->ifn);
1766 pfd->events = 0;
1767 } else if (sb.st_size > (off_t)iop->data_queued) {
1768 iop->ready = sb.st_size - iop->data_queued;
1769 iop->data_queued = sb.st_size;
1770
1771 if (!net_sendfile_data(tp, iop)) {
1772 pdc_dr_update(iop->dpp, tp->cpu,
1773 iop->ready);
1774 nentries++;
1775 } else
1776 clear_events(pfd);
1777 }
1778 if (--nevs == 0)
1779 break;
1780 }
1781 }
1782
1783 if (nentries)
1784 incr_entries(nentries);
1785
1786 return nentries;
1787}
1788
1789static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
1790{
1791 int i, nentries = 0;
1792 struct trace_buf *tbp;
1793 struct pollfd *pfd = tp->pfds;
1794 struct io_info *iop = tp->ios;
1795
1796 tbp = alloc_trace_buf(tp->cpu, buf_size);
1797 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1798 if (pfd->revents & POLLIN || force_read) {
1799 tbp->len = read(iop->ifd, tbp->buf, buf_size);
1800 if (tbp->len > 0) {
1801 pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
1802 add_trace_buf(iop->dpp, tp->cpu, &tbp);
1803 nentries++;
1804 } else if (tbp->len == 0) {
1805 /*
1806 * Short reads after we're done stop us
1807 * from trying reads.
1808 */
1809 if (tp->is_done)
1810 clear_events(pfd);
1811 } else {
1812 read_err(tp->cpu, iop->ifn);
1813 if (errno != EAGAIN || tp->is_done)
1814 clear_events(pfd);
1815 }
1816 if (!piped_output && --nevs == 0)
1817 break;
1818 }
1819 }
1820 free(tbp);
1821
1822 if (nentries)
1823 incr_entries(nentries);
1824
1825 return nentries;
1826}
1827
3fe0b570 1828static void *thread_main(void *arg)
8e86c98a 1829{
df81fdb5 1830 int ret, ndone, to_val;
3fe0b570 1831 struct tracer *tp = arg;
8e86c98a 1832
3fe0b570
AB
1833 ret = lock_on_cpu(tp->cpu);
1834 if (ret)
1835 goto err;
ff11d54c 1836
3fe0b570 1837 ret = open_ios(tp);
df81fdb5 1838 if (ret)
3fe0b570 1839 goto err;
6a6d3f0f 1840
3fe0b570
AB
1841 if (piped_output)
1842 to_val = 50; /* Frequent partial handles */
ff11d54c 1843 else
3fe0b570
AB
1844 to_val = 500; /* 1/2 second intervals */
1845
df81fdb5
AB
1846
1847 tracer_signal_ready(tp, Th_running, 0);
1848 tracer_wait_unblock(tp);
6488ca48 1849
3fe0b570
AB
1850 while (!tp->is_done) {
1851 ndone = poll(tp->pfds, ndevs, to_val);
1852 if (ndone || piped_output)
1853 (void)handle_pfds(tp, ndone, piped_output);
1854 else if (ndone < 0 && errno != EINTR)
1855 fprintf(stderr, "Thread %d poll failed: %d/%s\n",
1856 tp->cpu, errno, strerror(errno));
1857 }
22cd0c02
JA
1858
1859 /*
3fe0b570 1860 * Trace is stopped, pull data until we get a short read
22cd0c02 1861 */
3fe0b570
AB
1862 while (handle_pfds(tp, ndevs, 1) > 0)
1863 ;
055cc3e5 1864
3fe0b570 1865 close_ios(tp);
df81fdb5
AB
1866 tracer_signal_ready(tp, Th_leaving, 0);
1867 return NULL;
8e86c98a 1868
3fe0b570 1869err:
df81fdb5 1870 tracer_signal_ready(tp, Th_error, ret);
3fe0b570 1871 return NULL;
22cd0c02
JA
1872}
1873
3fe0b570 1874static int start_tracer(int cpu)
22cd0c02 1875{
3fe0b570 1876 struct tracer *tp;
22cd0c02 1877
3fe0b570
AB
1878 tp = malloc(sizeof(*tp));
1879 memset(tp, 0, sizeof(*tp));
7ab2f837 1880
3fe0b570 1881 INIT_LIST_HEAD(&tp->head);
3fe0b570
AB
1882 tp->status = 0;
1883 tp->cpu = cpu;
8e86c98a 1884
3fe0b570
AB
1885 if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
1886 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
1887 cpu, errno, strerror(errno));
df81fdb5
AB
1888 free(tp);
1889 return 1;
8e86c98a 1890 }
3fe0b570 1891
df81fdb5
AB
1892 list_add_tail(&tp->head, &tracers);
1893 return 0;
8e86c98a
JA
1894}
1895
df81fdb5 1896static void start_tracers(void)
e0a1988b 1897{
d045a704 1898 int cpu, started = 0;
df81fdb5 1899 struct list_head *p;
d045a704 1900 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
3fe0b570 1901
d045a704
JK
1902 for (cpu = 0; cpu < max_cpus; cpu++) {
1903 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
1904 continue;
3fe0b570
AB
1905 if (start_tracer(cpu))
1906 break;
d045a704
JK
1907 started++;
1908 }
e0a1988b 1909
d045a704 1910 wait_tracers_ready(started);
df81fdb5
AB
1911
1912 __list_for_each(p, &tracers) {
1913 struct tracer *tp = list_entry(p, struct tracer, head);
1914 if (tp->status)
1915 fprintf(stderr,
1916 "FAILED to start thread on CPU %d: %d/%s\n",
1917 tp->cpu, tp->status, strerror(tp->status));
1918 }
3fe0b570 1919}
e0a1988b 1920
3fe0b570
AB
1921static void stop_tracers(void)
1922{
1923 struct list_head *p;
e0a1988b
JA
1924
1925 /*
3fe0b570 1926 * Stop the tracing - makes the tracer threads clean up quicker.
e0a1988b 1927 */
3fe0b570
AB
1928 __list_for_each(p, &devpaths) {
1929 struct devpath *dpp = list_entry(p, struct devpath, head);
1930 (void)ioctl(dpp->fd, BLKTRACESTOP);
e0a1988b
JA
1931 }
1932
3fe0b570
AB
1933 /*
1934 * Tell each tracer to quit
1935 */
1936 __list_for_each(p, &tracers) {
1937 struct tracer *tp = list_entry(p, struct tracer, head);
1938 tp->is_done = 1;
1939 }
838361c6 1940 pthread_cond_broadcast(&mt_cond);
ff11d54c 1941}
e0a1988b 1942
3fe0b570 1943static void del_tracers(void)
ff11d54c 1944{
3fe0b570 1945 struct list_head *p, *q;
ff11d54c 1946
3fe0b570
AB
1947 list_for_each_safe(p, q, &tracers) {
1948 struct tracer *tp = list_entry(p, struct tracer, head);
ff11d54c 1949
3fe0b570
AB
1950 list_del(&tp->head);
1951 free(tp);
e0a1988b 1952 }
ff11d54c 1953}
e0a1988b 1954
3fe0b570 1955static void wait_tracers(void)
ff11d54c 1956{
3fe0b570 1957 struct list_head *p;
ff11d54c 1958
3fe0b570
AB
1959 if (use_tracer_devpaths())
1960 process_trace_bufs();
1961
df81fdb5
AB
1962 wait_tracers_leaving();
1963
3fe0b570
AB
1964 __list_for_each(p, &tracers) {
1965 int ret;
1966 struct tracer *tp = list_entry(p, struct tracer, head);
1967
3fe0b570
AB
1968 ret = pthread_join(tp->thread, NULL);
1969 if (ret)
1970 fprintf(stderr, "Thread join %d failed %d\n",
1971 tp->cpu, ret);
ff11d54c
TZ
1972 }
1973
3fe0b570
AB
1974 if (use_tracer_devpaths())
1975 clean_trace_bufs();
1976
1977 get_all_drops();
ff11d54c
TZ
1978}
1979
3fe0b570 1980static void exit_tracing(void)
ff11d54c 1981{
3fe0b570
AB
1982 signal(SIGINT, SIG_IGN);
1983 signal(SIGHUP, SIG_IGN);
1984 signal(SIGTERM, SIG_IGN);
1985 signal(SIGALRM, SIG_IGN);
1986
1987 stop_tracers();
1988 wait_tracers();
1989 del_tracers();
1990 rel_devpaths();
e0a1988b
JA
1991}
1992
3fe0b570 1993static void handle_sigint(__attribute__((__unused__)) int sig)
8e86c98a 1994{
3fe0b570
AB
1995 done = 1;
1996 stop_tracers();
8e86c98a
JA
1997}
1998
3fe0b570 1999static void show_stats(struct list_head *devpaths)
659bcc3f 2000{
3fe0b570
AB
2001 FILE *ofp;
2002 struct list_head *p;
2003 unsigned long long nevents, data_read;
2004 unsigned long long total_drops = 0;
2005 unsigned long long total_events = 0;
2006
2007 if (piped_output)
2008 ofp = my_fopen("/dev/null", "w");
2009 else
2010 ofp = stdout;
ff11d54c 2011
3fe0b570
AB
2012 __list_for_each(p, devpaths) {
2013 int cpu;
2014 struct pdc_stats *sp;
2015 struct devpath *dpp = list_entry(p, struct devpath, head);
e0a1988b 2016
3fe0b570
AB
2017 if (net_mode == Net_server)
2018 printf("server: end of run for %s:%s\n",
2019 dpp->ch->hostname, dpp->buts_name);
e0a1988b 2020
3fe0b570
AB
2021 data_read = 0;
2022 nevents = 0;
2023
2024 fprintf(ofp, "=== %s ===\n", dpp->buts_name);
2025 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
2026 /*
2027 * Estimate events if not known...
2028 */
2029 if (sp->nevents == 0) {
2030 sp->nevents = sp->data_read /
2031 sizeof(struct blk_io_trace);
ff11d54c 2032 }
e0a1988b 2033
3fe0b570
AB
2034 fprintf(ofp,
2035 " CPU%3d: %20llu events, %8llu KiB data\n",
2036 cpu, sp->nevents, (sp->data_read + 1023) >> 10);
e0a1988b 2037
3fe0b570
AB
2038 data_read += sp->data_read;
2039 nevents += sp->nevents;
e0a1988b
JA
2040 }
2041
3fe0b570
AB
2042 fprintf(ofp, " Total: %20llu events (dropped %llu),"
2043 " %8llu KiB data\n", nevents,
2044 dpp->drops, (data_read + 1024) >> 10);
8e86c98a 2045
3fe0b570
AB
2046 total_drops += dpp->drops;
2047 total_events += (nevents + dpp->drops);
8e86c98a
JA
2048 }
2049
3fe0b570
AB
2050 fflush(ofp);
2051 if (piped_output)
2052 fclose(ofp);
8e86c98a 2053
3fe0b570
AB
2054 if (total_drops) {
2055 double drops_ratio = 1.0;
8e86c98a 2056
3fe0b570
AB
2057 if (total_events)
2058 drops_ratio = (double)total_drops/(double)total_events;
8e86c98a 2059
3fe0b570
AB
2060 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
2061 "Consider using a larger buffer size (-b) "
2062 "and/or more buffers (-n)\n",
2063 total_drops, 100.0 * drops_ratio);
8e86c98a 2064 }
8e86c98a
JA
2065}
2066
3fe0b570 2067static int handle_args(int argc, char *argv[])
8e86c98a 2068{
3fe0b570 2069 int c, i;
e3e74029 2070 struct statfs st;
d39c04ca
AB
2071 int act_mask_tmp = 0;
2072
2073 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
2074 switch (c) {
2075 case 'a':
2076 i = find_mask_map(optarg);
2077 if (i < 0) {
3fe0b570 2078 fprintf(stderr, "Invalid action mask %s\n",
d39c04ca 2079 optarg);
7425d456 2080 return 1;
d39c04ca
AB
2081 }
2082 act_mask_tmp |= i;
2083 break;
2084
2085 case 'A':
3fe0b570 2086 if ((sscanf(optarg, "%x", &i) != 1) ||
98f8386b 2087 !valid_act_opt(i)) {
d39c04ca 2088 fprintf(stderr,
ab197ca7 2089 "Invalid set action mask %s/0x%x\n",
d39c04ca 2090 optarg, i);
7425d456 2091 return 1;
d39c04ca
AB
2092 }
2093 act_mask_tmp = i;
2094 break;
d0ca268b 2095
d39c04ca 2096 case 'd':
3fe0b570 2097 if (add_devpath(optarg) != 0)
e7c9f3ff 2098 return 1;
d39c04ca
AB
2099 break;
2100
cf1edb17
AB
2101 case 'I': {
2102 char dev_line[256];
3fe0b570 2103 FILE *ifp = my_fopen(optarg, "r");
cf1edb17
AB
2104
2105 if (!ifp) {
3fe0b570
AB
2106 fprintf(stderr,
2107 "Invalid file for devices %s\n",
cf1edb17
AB
2108 optarg);
2109 return 1;
2110 }
2111
f9a89a6f
ES
2112 while (fscanf(ifp, "%s\n", dev_line) == 1) {
2113 if (add_devpath(dev_line) != 0) {
2114 fclose(ifp);
cf1edb17 2115 return 1;
f9a89a6f
ES
2116 }
2117 }
2118 fclose(ifp);
cf1edb17
AB
2119 break;
2120 }
cf1edb17 2121
5270dddd 2122 case 'r':
3d06efea 2123 debugfs_path = optarg;
5270dddd
JA
2124 break;
2125
d5396421 2126 case 'o':
66efebf8 2127 output_name = optarg;
d5396421 2128 break;
bc39777c
JA
2129 case 'k':
2130 kill_running_trace = 1;
2131 break;
ece238a6
NS
2132 case 'w':
2133 stop_watch = atoi(optarg);
2134 if (stop_watch <= 0) {
2135 fprintf(stderr,
2136 "Invalid stopwatch value (%d secs)\n",
2137 stop_watch);
2138 return 1;
2139 }
2140 break;
57ea8602 2141 case 'V':
5d4f19d9 2142 case 'v':
52724a0e 2143 printf("%s version %s\n", argv[0], blktrace_version);
3fe0b570
AB
2144 exit(0);
2145 /*NOTREACHED*/
129aa440 2146 case 'b':
eb3c8108 2147 buf_size = strtoul(optarg, NULL, 10);
183a0855 2148 if (buf_size <= 0 || buf_size > 16*1024) {
3fe0b570
AB
2149 fprintf(stderr, "Invalid buffer size (%lu)\n",
2150 buf_size);
129aa440
JA
2151 return 1;
2152 }
2153 buf_size <<= 10;
2154 break;
2155 case 'n':
eb3c8108 2156 buf_nr = strtoul(optarg, NULL, 10);
129aa440
JA
2157 if (buf_nr <= 0) {
2158 fprintf(stderr,
eb3c8108 2159 "Invalid buffer nr (%lu)\n", buf_nr);
129aa440
JA
2160 return 1;
2161 }
2162 break;
d1d7f15f
JA
2163 case 'D':
2164 output_dir = optarg;
2165 break;
8e86c98a
JA
2166 case 'h':
2167 net_mode = Net_client;
d324757e
ES
2168 memset(hostname, 0, sizeof(hostname));
2169 strncpy(hostname, optarg, sizeof(hostname));
2170 hostname[sizeof(hostname) - 1] = '\0';
8e86c98a
JA
2171 break;
2172 case 'l':
2173 net_mode = Net_server;
2174 break;
2175 case 'p':
2176 net_port = atoi(optarg);
2177 break;
32f18c48 2178 case 's':
79971f43 2179 net_use_sendfile = 0;
32f18c48 2180 break;
d39c04ca 2181 default:
ee1f4158 2182 show_usage(argv[0]);
3fe0b570
AB
2183 exit(1);
2184 /*NOTREACHED*/
d39c04ca
AB
2185 }
2186 }
2187
3fe0b570
AB
2188 while (optind < argc)
2189 if (add_devpath(argv[optind++]) != 0)
2190 return 1;
8e86c98a 2191
3fe0b570
AB
2192 if (net_mode != Net_server && ndevs == 0) {
2193 show_usage(argv[0]);
2194 return 1;
2195 }
8e86c98a 2196
d8365957 2197 if (statfs(debugfs_path, &st) < 0) {
3fe0b570
AB
2198 fprintf(stderr, "Invalid debug path %s: %d/%s\n",
2199 debugfs_path, errno, strerror(errno));
2200 return 1;
2201 }
2202
d8365957
MP
2203 if (st.f_type != (long)DEBUGFS_TYPE) {
2204 fprintf(stderr, "Debugfs is not mounted at %s\n", debugfs_path);
2205 return 1;
2206 }
2207
3fe0b570
AB
2208 if (act_mask_tmp != 0)
2209 act_mask = act_mask_tmp;
2210
e58f3937
AB
2211 if (net_mode == Net_client && net_setup_addr())
2212 return 1;
2213
3fe0b570
AB
2214 /*
2215 * Set up for appropriate PFD handler based upon output name.
2216 */
2217 if (net_client_use_sendfile())
2218 handle_pfds = handle_pfds_netclient;
2219 else if (net_client_use_send())
2220 handle_pfds = handle_pfds_entries;
2221 else if (output_name && (strcmp(output_name, "-") == 0)) {
2222 piped_output = 1;
2223 handle_pfds = handle_pfds_entries;
2224 pfp = stdout;
ae2dc05e
ES
2225 if (setvbuf(pfp, NULL, _IONBF, 0)) {
2226 perror("setvbuf stdout");
2227 return 1;
2228 }
3fe0b570
AB
2229 } else
2230 handle_pfds = handle_pfds_file;
2231 return 0;
2232}
2233
2234static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
2235 int fd)
2236{
2237 struct cl_conn *nc;
2238
2239 nc = malloc(sizeof(*nc));
2240 memset(nc, 0, sizeof(*nc));
2241
2242 time(&nc->connect_time);
2243 nc->ch = ch;
2244 nc->fd = fd;
2245 nc->ncpus = -1;
2246
2247 list_add_tail(&nc->ch_head, &ch->conn_list);
2248 ch->connects++;
2249
2250 list_add_tail(&nc->ns_head, &ns->conn_list);
2251 ns->connects++;
2252 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2253}
2254
2255static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
2256 struct cl_conn *nc)
2257{
2258 net_close_connection(&nc->fd);
2259
2260 list_del(&nc->ch_head);
2261 ch->connects--;
2262
2263 list_del(&nc->ns_head);
2264 ns->connects--;
2265 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2266
2267 free(nc);
2268}
2269
2270static struct cl_host *net_find_client_host(struct net_server_s *ns,
2271 struct in_addr cl_in_addr)
2272{
2273 struct list_head *p;
2274
2275 __list_for_each(p, &ns->ch_list) {
2276 struct cl_host *ch = list_entry(p, struct cl_host, head);
2277
2278 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
2279 return ch;
2280 }
2281
2282 return NULL;
2283}
2284
2285static struct cl_host *net_add_client_host(struct net_server_s *ns,
2286 struct sockaddr_in *addr)
2287{
2288 struct cl_host *ch;
2289
2290 ch = malloc(sizeof(*ch));
2291 memset(ch, 0, sizeof(*ch));
2292
2293 ch->ns = ns;
2294 ch->cl_in_addr = addr->sin_addr;
2295 list_add_tail(&ch->head, &ns->ch_list);
2296 ns->nchs++;
ec685dd2 2297
3fe0b570
AB
2298 ch->hostname = strdup(inet_ntoa(addr->sin_addr));
2299 printf("server: connection from %s\n", ch->hostname);
2300
2301 INIT_LIST_HEAD(&ch->conn_list);
2302 INIT_LIST_HEAD(&ch->devpaths);
2303
2304 return ch;
2305}
2306
2307static void device_done(struct devpath *dpp, int ncpus)
2308{
2309 int cpu;
2310 struct io_info *iop;
2311
2312 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
2313 close_iop(iop);
2314
2315 list_del(&dpp->head);
2316 dpp_free(dpp);
2317}
2318
2319static void net_ch_remove(struct cl_host *ch, int ncpus)
2320{
2321 struct list_head *p, *q;
2322 struct net_server_s *ns = ch->ns;
2323
2324 list_for_each_safe(p, q, &ch->devpaths) {
2325 struct devpath *dpp = list_entry(p, struct devpath, head);
2326 device_done(dpp, ncpus);
ec685dd2 2327 }
8e86c98a 2328
3fe0b570
AB
2329 list_for_each_safe(p, q, &ch->conn_list) {
2330 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
2331
2332 ch_rem_connection(ns, ch, nc);
22cd0c02
JA
2333 }
2334
3fe0b570
AB
2335 list_del(&ch->head);
2336 ns->nchs--;
2337
2338 if (ch->hostname)
2339 free(ch->hostname);
2340 free(ch);
2341}
2342
2343static void net_add_connection(struct net_server_s *ns)
2344{
2345 int fd;
2346 struct cl_host *ch;
2347 socklen_t socklen = sizeof(ns->addr);
2348
d5302b03 2349 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
3fe0b570
AB
2350 if (fd < 0) {
2351 /*
2352 * This is OK: we just won't accept this connection,
2353 * nothing fatal.
2354 */
2355 perror("accept");
2356 } else {
2357 ch = net_find_client_host(ns, ns->addr.sin_addr);
2358 if (!ch)
2359 ch = net_add_client_host(ns, &ns->addr);
2360
2361 ch_add_connection(ns, ch, fd);
d39c04ca 2362 }
3fe0b570 2363}
d39c04ca 2364
3fe0b570
AB
2365static struct devpath *nc_add_dpp(struct cl_conn *nc,
2366 struct blktrace_net_hdr *bnh,
2367 time_t connect_time)
2368{
2369 int cpu;
2370 struct io_info *iop;
2371 struct devpath *dpp;
2372
2373 dpp = malloc(sizeof(*dpp));
2374 memset(dpp, 0, sizeof(*dpp));
2375
2376 dpp->buts_name = strdup(bnh->buts_name);
2377 dpp->path = strdup(bnh->buts_name);
2378 dpp->fd = -1;
2379 dpp->ch = nc->ch;
2380 dpp->cl_id = bnh->cl_id;
2381 dpp->cl_connect_time = connect_time;
2382 dpp->ncpus = nc->ncpus;
2383 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
2384 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
2385
2386 list_add_tail(&dpp->head, &nc->ch->devpaths);
2387 nc->ch->ndevs++;
2388
2389 dpp->ios = calloc(nc->ncpus, sizeof(*iop));
2390 memset(dpp->ios, 0, ndevs * sizeof(*iop));
2391
2392 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
2393 iop->dpp = dpp;
2394 iop->nc = nc;
2395 init_mmap_info(&iop->mmap_info);
2396
2397 if (iop_open(iop, cpu))
2398 goto err;
69dd57c2
AB
2399 }
2400
3fe0b570 2401 return dpp;
69dd57c2 2402
3fe0b570
AB
2403err:
2404 /*
2405 * Need to unravel what's been done...
2406 */
2407 while (cpu >= 0)
2408 close_iop(&dpp->ios[cpu--]);
2409 dpp_free(dpp);
2410
2411 return NULL;
2412}
d0ca268b 2413
3fe0b570
AB
2414static struct devpath *nc_find_dpp(struct cl_conn *nc,
2415 struct blktrace_net_hdr *bnh)
2416{
2417 struct list_head *p;
2418 time_t connect_time = nc->connect_time;
3d06efea 2419
3fe0b570
AB
2420 __list_for_each(p, &nc->ch->devpaths) {
2421 struct devpath *dpp = list_entry(p, struct devpath, head);
2422
2423 if (!strcmp(dpp->buts_name, bnh->buts_name))
2424 return dpp;
2425
2426 if (dpp->cl_id == bnh->cl_id)
2427 connect_time = dpp->cl_connect_time;
d0ca268b
JA
2428 }
2429
3fe0b570
AB
2430 return nc_add_dpp(nc, bnh, connect_time);
2431}
bc39777c 2432
3fe0b570
AB
2433static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
2434 struct blktrace_net_hdr *bnh)
2435{
2436 int ret;
2437 struct io_info *iop = &dpp->ios[bnh->cpu];
2438 struct mmap_info *mip = &iop->mmap_info;
2439
ae7c049d 2440 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info, NULL)) {
3fe0b570
AB
2441 fprintf(stderr, "ncd(%s:%d): mmap failed\n",
2442 nc->ch->hostname, nc->fd);
2443 exit(1);
2444 }
2445
2446 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
2447 if (ret > 0) {
2448 pdc_dr_update(dpp, bnh->cpu, ret);
2449 mip->fs_size += ret;
2450 mip->fs_off += ret;
2451 } else if (ret < 0)
2452 exit(1);
2453}
2454
2455/*
2456 * Returns 1 if we closed a host - invalidates other polling information
2457 * that may be present.
2458 */
2459static int net_client_data(struct cl_conn *nc)
2460{
2461 int ret;
2462 struct devpath *dpp;
2463 struct blktrace_net_hdr bnh;
2464
2465 ret = net_get_header(nc, &bnh);
2466 if (ret == 0)
7425d456 2467 return 0;
3fe0b570
AB
2468
2469 if (ret < 0) {
2470 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
2471 exit(1);
2472 }
2473
2474 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
2475 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
2476 exit(1);
2477 }
2478
2479 if (!data_is_native) {
2480 bnh.magic = be32_to_cpu(bnh.magic);
2481 bnh.cpu = be32_to_cpu(bnh.cpu);
2482 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
2483 bnh.len = be32_to_cpu(bnh.len);
2484 bnh.cl_id = be32_to_cpu(bnh.cl_id);
2485 bnh.buf_size = be32_to_cpu(bnh.buf_size);
2486 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
2487 bnh.page_size = be32_to_cpu(bnh.page_size);
2488 }
2489
2490 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2491 fprintf(stderr, "ncd(%s:%d): bad data magic\n",
2492 nc->ch->hostname, nc->fd);
2493 exit(1);
2494 }
2495
2496 if (nc->ncpus == -1)
2497 nc->ncpus = bnh.max_cpus;
2498
2499 /*
2500 * len == 0 means the other end is sending us a new connection/dpp
2501 * len == 1 means that the other end signalled end-of-run
2502 */
2503 dpp = nc_find_dpp(nc, &bnh);
2504 if (bnh.len == 0) {
2505 /*
2506 * Just adding in the dpp above is enough
2507 */
2508 ack_open_close(nc->fd, dpp->buts_name);
2509 nc->ch->cl_opens++;
2510 } else if (bnh.len == 1) {
2511 /*
2512 * overload cpu count with dropped events
2513 */
2514 dpp->drops = bnh.cpu;
2515
2516 ack_open_close(nc->fd, dpp->buts_name);
2517 if (--nc->ch->cl_opens == 0) {
2518 show_stats(&nc->ch->devpaths);
2519 net_ch_remove(nc->ch, nc->ncpus);
2520 return 1;
2521 }
2522 } else
2523 net_client_read_data(nc, dpp, &bnh);
2524
2525 return 0;
2526}
2527
2528static void handle_client_data(struct net_server_s *ns, int events)
2529{
2530 struct cl_conn *nc;
2531 struct pollfd *pfd;
2532 struct list_head *p, *q;
2533
2534 pfd = &ns->pfds[1];
2535 list_for_each_safe(p, q, &ns->conn_list) {
2536 if (pfd->revents & POLLIN) {
2537 nc = list_entry(p, struct cl_conn, ns_head);
2538
2539 if (net_client_data(nc) || --events == 0)
2540 break;
2541 }
2542 pfd++;
2543 }
2544}
2545
2546static void net_setup_pfds(struct net_server_s *ns)
2547{
2548 struct pollfd *pfd;
2549 struct list_head *p;
2550
2551 ns->pfds[0].fd = ns->listen_fd;
2552 ns->pfds[0].events = POLLIN;
2553
2554 pfd = &ns->pfds[1];
2555 __list_for_each(p, &ns->conn_list) {
2556 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
2557
2558 pfd->fd = nc->fd;
2559 pfd->events = POLLIN;
2560 pfd++;
2561 }
2562}
2563
2564static int net_server_handle_connections(struct net_server_s *ns)
2565{
2566 int events;
2567
2568 printf("server: waiting for connections...\n");
2569
2570 while (!done) {
2571 net_setup_pfds(ns);
2572 events = poll(ns->pfds, ns->connects + 1, -1);
2573 if (events < 0) {
2574 if (errno != EINTR) {
2575 perror("FATAL: poll error");
2576 return 1;
2577 }
2578 } else if (events > 0) {
2579 if (ns->pfds[0].revents & POLLIN) {
2580 net_add_connection(ns);
2581 events--;
2582 }
2583
2584 if (events)
2585 handle_client_data(ns, events);
2586 }
2587 }
2588
2589 return 0;
2590}
2591
2592static int net_server(void)
2593{
2594 int fd, opt;
2595 int ret = 1;
2596 struct net_server_s net_server;
2597 struct net_server_s *ns = &net_server;
2598
2599 memset(ns, 0, sizeof(*ns));
2600 INIT_LIST_HEAD(&ns->ch_list);
2601 INIT_LIST_HEAD(&ns->conn_list);
2602 ns->pfds = malloc(sizeof(struct pollfd));
2603
2604 fd = my_socket(AF_INET, SOCK_STREAM, 0);
2605 if (fd < 0) {
2606 perror("server: socket");
2607 goto out;
2608 }
2609
2610 opt = 1;
2611 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
2612 perror("setsockopt");
2613 goto out;
2614 }
2615
2616 memset(&ns->addr, 0, sizeof(ns->addr));
2617 ns->addr.sin_family = AF_INET;
2618 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
2619 ns->addr.sin_port = htons(net_port);
2620
2621 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
2622 perror("bind");
2623 goto out;
2624 }
2625
2626 if (listen(fd, 1) < 0) {
2627 perror("listen");
2628 goto out;
2629 }
2630
2631 /*
2632 * The actual server looping is done here:
2633 */
2634 ns->listen_fd = fd;
2635 ret = net_server_handle_connections(ns);
2636
2637 /*
2638 * Clean up and return...
2639 */
2640out:
2641 free(ns->pfds);
2642 return ret;
2643}
2644
055cc3e5
AB
2645static int run_tracers(void)
2646{
2647 atexit(exit_tracing);
2648 if (net_mode == Net_client)
2649 printf("blktrace: connecting to %s\n", hostname);
2650
2651 setup_buts();
2652
2653 if (use_tracer_devpaths()) {
2654 if (setup_tracer_devpaths())
2655 return 1;
2656
2657 if (piped_output)
2658 handle_list = handle_list_file;
2659 else
2660 handle_list = handle_list_net;
2661 }
2662
2663 start_tracers();
2664 if (nthreads_running == ncpus) {
2665 unblock_tracers();
2666 start_buts();
2667 if (net_mode == Net_client)
2668 printf("blktrace: connected!\n");
2669 if (stop_watch)
2670 alarm(stop_watch);
2671 } else
2672 stop_tracers();
2673
2674 wait_tracers();
2675 if (nthreads_running == ncpus)
2676 show_stats(&devpaths);
2677 if (net_client_use_send())
2678 close_client_connections();
2679 del_tracers();
2680
2681 return 0;
2682}
2683
d045a704
JK
2684static cpu_set_t *get_online_cpus(void)
2685{
2686 FILE *cpus;
2687 cpu_set_t *set;
2688 size_t alloc_size;
2689 int cpuid, prevcpuid = -1;
2690 char nextch;
2691 int n, ncpu, curcpu = 0;
2692 int *cpu_nums;
2693
2694 ncpu = sysconf(_SC_NPROCESSORS_CONF);
2695 if (ncpu < 0)
2696 return NULL;
2697
2698 cpu_nums = malloc(sizeof(int)*ncpu);
2699 if (!cpu_nums) {
2700 errno = ENOMEM;
2701 return NULL;
2702 }
2703
2704 /*
2705 * There is no way to easily get maximum CPU number. So we have to
2706 * parse the file first to find it out and then create appropriate
2707 * cpuset
2708 */
2709 cpus = my_fopen("/sys/devices/system/cpu/online", "r");
2710 for (;;) {
2711 n = fscanf(cpus, "%d%c", &cpuid, &nextch);
2712 if (n <= 0)
2713 break;
2714 if (n == 2 && nextch == '-') {
2715 prevcpuid = cpuid;
2716 continue;
2717 }
2718 if (prevcpuid == -1)
2719 prevcpuid = cpuid;
2720 while (prevcpuid <= cpuid) {
2721 /* More CPUs listed than configured? */
2722 if (curcpu >= ncpu) {
2723 errno = EINVAL;
2724 return NULL;
2725 }
2726 cpu_nums[curcpu++] = prevcpuid++;
2727 }
2728 prevcpuid = -1;
2729 }
2730 fclose(cpus);
2731
2732 ncpu = curcpu;
2733 max_cpus = cpu_nums[ncpu - 1] + 1;
2734
2735 /* Now that we have maximum cpu number, create a cpuset */
2736 set = CPU_ALLOC(max_cpus);
2737 if (!set) {
2738 errno = ENOMEM;
2739 return NULL;
2740 }
2741 alloc_size = CPU_ALLOC_SIZE(max_cpus);
2742 CPU_ZERO_S(alloc_size, set);
2743
2744 for (curcpu = 0; curcpu < ncpu; curcpu++)
2745 CPU_SET_S(cpu_nums[curcpu], alloc_size, set);
2746
2747 free(cpu_nums);
2748
2749 return set;
2750}
2751
3fe0b570
AB
2752int main(int argc, char *argv[])
2753{
2754 int ret = 0;
2755
2756 setlocale(LC_NUMERIC, "en_US");
2757 pagesize = getpagesize();
d045a704
JK
2758 online_cpus = get_online_cpus();
2759 if (!online_cpus) {
2760 fprintf(stderr, "cannot get online cpus %d/%s\n",
3fe0b570
AB
2761 errno, strerror(errno));
2762 ret = 1;
2763 goto out;
055cc3e5 2764 } else if (handle_args(argc, argv)) {
3fe0b570
AB
2765 ret = 1;
2766 goto out;
bc39777c
JA
2767 }
2768
d045a704 2769 ncpus = CPU_COUNT_S(CPU_ALLOC_SIZE(max_cpus), online_cpus);
ce2151eb
AB
2770 if (ndevs > 1 && output_name && strcmp(output_name, "-") != 0) {
2771 fprintf(stderr, "-o not supported with multiple devices\n");
2772 ret = 1;
2773 goto out;
2774 }
2775
d0ca268b
JA
2776 signal(SIGINT, handle_sigint);
2777 signal(SIGHUP, handle_sigint);
2778 signal(SIGTERM, handle_sigint);
ece238a6 2779 signal(SIGALRM, handle_sigint);
38e1f0c6 2780 signal(SIGPIPE, SIG_IGN);
d0ca268b 2781
3fe0b570
AB
2782 if (kill_running_trace) {
2783 struct devpath *dpp;
2784 struct list_head *p;
8e86c98a 2785
3fe0b570
AB
2786 __list_for_each(p, &devpaths) {
2787 dpp = list_entry(p, struct devpath, head);
2788 if (__stop_trace(dpp->fd)) {
2789 fprintf(stderr,
2790 "BLKTRACETEARDOWN %s failed: %d/%s\n",
2791 dpp->path, errno, strerror(errno));
2792 }
2793 }
2794 } else if (net_mode == Net_server) {
2795 if (output_name) {
2796 fprintf(stderr, "-o ignored in server mode\n");
2797 output_name = NULL;
2798 }
3fe0b570 2799 ret = net_server();
055cc3e5
AB
2800 } else
2801 ret = run_tracers();
d0ca268b 2802
3fe0b570
AB
2803out:
2804 if (pfp)
2805 fclose(pfp);
2806 rel_devpaths();
2807 return ret;
2808}