X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=blktrace.c;h=d0d271f9add5106b4481321058787f1ba1aa82a4;hb=e63098f39398bde67be9b64a49deece1c60614df;hp=fa080718f97be6271ee49461423008ac7887e102;hpb=1c99bc2122136be9d3f812fd7e590f71fec378a1;p=blktrace.git diff --git a/blktrace.c b/blktrace.c index fa08071..d0d271f 100644 --- a/blktrace.c +++ b/blktrace.c @@ -2,6 +2,10 @@ * block queue tracing application * * Copyright (C) 2005 Jens Axboe + * Copyright (C) 2006 Jens Axboe + * + * Rewrite to have a single thread per CPU (managing all devices on that CPU) + * Alan D. Brunelle - January 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,45 +22,314 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include #include -#include -#include +#include +#include #include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include +#include +#include +#include +#include +#include +#include + +#include "btt/list.h" #include "blktrace.h" -static char blktrace_version[] = "0.99"; - /* * You may want to increase this even more, if you are logging at a high * rate and see skipped/missed events */ -#define BUF_SIZE (512 * 1024) -#define BUF_NR (4) +#define BUF_SIZE (512 * 1024) +#define BUF_NR (4) + +#define FILE_VBUF_SIZE (128 * 1024) + +#define DEBUGFS_TYPE (0x64626720) +#define TRACE_NET_PORT (8462) + +enum { + Net_none = 0, + Net_server, + Net_client, +}; + +enum thread_status { + Th_running, + Th_leaving, + Th_error +}; + +/* + * Generic stats collected: nevents can be _roughly_ estimated by data_read + * (discounting pdu...) + * + * These fields are updated w/ pdc_dr_update & pdc_nev_update below. + */ +struct pdc_stats { + unsigned long long data_read; + unsigned long long nevents; +}; + +struct devpath { + struct list_head head; + char *path; /* path to device special file */ + char *buts_name; /* name returned from bt kernel code */ + struct pdc_stats *stats; + int fd, ncpus; + unsigned long long drops; + + /* + * For piped output only: + * + * Each tracer will have a tracer_devpath_head that it will add new + * data onto. It's list is protected above (tracer_devpath_head.mutex) + * and it will signal the processing thread using the dp_cond, + * dp_mutex & dp_entries variables above. + */ + struct tracer_devpath_head *heads; + + /* + * For network server mode only: + */ + struct cl_host *ch; + u32 cl_id; + time_t cl_connect_time; + int setup_done; /* ioctl BLKTRACESETUP done */ + struct io_info *ios; +}; + +/* + * For piped output to stdout we will have each tracer thread (one per dev) + * tack buffers read from the relay queues on a per-device list. + * + * The main thread will then collect trace buffers from each of lists in turn. + * + * We will use a mutex to guard each of the trace_buf list. The tracers + * can then signal the main thread using and + * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will + * signal. When dp_entries is 0, the main thread will wait for that condition + * to be signalled.) + * + * adb: It may be better just to have a large buffer per tracer per dev, + * and then use it as a ring-buffer. This would certainly cut down a lot + * of malloc/free thrashing, at the cost of more memory movements (potentially). + */ +struct trace_buf { + struct list_head head; + struct devpath *dpp; + void *buf; + int cpu, len; +}; + +struct tracer_devpath_head { + pthread_mutex_t mutex; + struct list_head head; + struct trace_buf *prev; +}; + +/* + * Used to handle the mmap() interfaces for output file (containing traces) + */ +struct mmap_info { + void *fs_buf; + unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len; + unsigned long buf_size, buf_nr; + int pagesize; +}; + +/* + * Each thread doing work on a (client) side of blktrace will have one + * of these. The ios array contains input/output information, pfds holds + * poll() data. The volatile's provide flags to/from the main executing + * thread. + */ +struct tracer { + struct list_head head; + struct io_info *ios; + struct pollfd *pfds; + pthread_t thread; + int cpu, nios; + volatile int status, is_done; +}; + +/* + * networking stuff follows. we include a magic number so we know whether + * to endianness convert or not. + * + * The len field is overloaded: + * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu + * 1 - Indicates a "close" - Shut down connection orderly + * + * The cpu field is overloaded on close: it will contain the number of drops. + */ +struct blktrace_net_hdr { + u32 magic; /* same as trace magic */ + char buts_name[32]; /* trace name */ + u32 cpu; /* for which cpu */ + u32 max_cpus; + u32 len; /* length of following trace data */ + u32 cl_id; /* id for set of client per-cpu connections */ + u32 buf_size; /* client buf_size for this trace */ + u32 buf_nr; /* client buf_nr for this trace */ + u32 page_size; /* client page_size for this trace */ +}; + +/* + * Each host encountered has one of these. The head is used to link this + * on to the network server's ch_list. Connections associated with this + * host are linked on conn_list, and any devices traced on that host + * are connected on the devpaths list. + */ +struct cl_host { + struct list_head head; + struct list_head conn_list; + struct list_head devpaths; + struct net_server_s *ns; + char *hostname; + struct in_addr cl_in_addr; + int connects, ndevs, cl_opens; +}; + +/* + * Each connection (client to server socket ('fd')) has one of these. A + * back reference to the host ('ch'), and lists headers (for the host + * list, and the network server conn_list) are also included. + */ +struct cl_conn { + struct list_head ch_head, ns_head; + struct cl_host *ch; + int fd, ncpus; + time_t connect_time; +}; + +/* + * The network server requires some poll structures to be maintained - + * one per conection currently on conn_list. The nchs/ch_list values + * are for each host connected to this server. The addr field is used + * for scratch as new connections are established. + */ +struct net_server_s { + struct list_head conn_list; + struct list_head ch_list; + struct pollfd *pfds; + int listen_fd, connects, nchs; + struct sockaddr_in addr; +}; + +/* + * This structure is (generically) used to providide information + * for a read-to-write set of values. + * + * ifn & ifd represent input information + * + * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally). + */ +struct io_info { + struct devpath *dpp; + FILE *ofp; + char *obuf; + struct cl_conn *nc; /* Server network connection */ + + /* + * mmap controlled output files + */ + struct mmap_info mmap_info; + + /* + * Client network fields + */ + unsigned int ready; + unsigned long long data_queued; + + /* + * Input/output file descriptors & names + */ + int ifd, ofd; + char ifn[MAXPATHLEN + 64]; + char ofn[MAXPATHLEN + 64]; +}; + +static char blktrace_version[] = "2.0.0"; + +/* + * Linkage to blktrace helper routines (trace conversions) + */ +int data_is_native = -1; + +static int ndevs; +static int max_cpus; +static int ncpus; +static cpu_set_t *online_cpus; +static int pagesize; +static int act_mask = ~0U; +static int kill_running_trace; +static int stop_watch; +static int piped_output; + +static char *debugfs_path = "/sys/kernel/debug"; +static char *output_name; +static char *output_dir; + +static unsigned long buf_size = BUF_SIZE; +static unsigned long buf_nr = BUF_NR; + +static FILE *pfp; + +static LIST_HEAD(devpaths); +static LIST_HEAD(tracers); + +static volatile int done; + +/* + * tracer threads add entries, the main thread takes them off and processes + * them. These protect the dp_entries variable. + */ +static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER; +static volatile int dp_entries; -#define OFILE_BUF (128 * 1024) +/* + * These synchronize master / thread interactions. + */ +static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER; +static volatile int nthreads_running; +static volatile int nthreads_leaving; +static volatile int nthreads_error; +static volatile int tracers_run; -#define RELAYFS_TYPE 0xF0B4A981 +/* + * network cmd line params + */ +static struct sockaddr_in hostname_addr; +static char hostname[MAXHOSTNAMELEN]; +static int net_port = TRACE_NET_PORT; +static int net_use_sendfile = 1; +static int net_mode; +static int *cl_fds; -#define RING_INIT_NR (2) -#define RING_MAX_NR (16UL) +static int (*handle_pfds)(struct tracer *, int, int); +static int (*handle_list)(struct tracer_devpath_head *, struct list_head *); -#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:" +#define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:" static struct option l_opts[] = { { .name = "dev", @@ -64,6 +337,12 @@ static struct option l_opts[] = { .flag = NULL, .val = 'd' }, + { + .name = "input-devs", + .has_arg = required_argument, + .flag = NULL, + .val = 'I' + }, { .name = "act-mask", .has_arg = required_argument, @@ -100,6 +379,12 @@ static struct option l_opts[] = { .flag = NULL, .val = 'w' }, + { + .name = "version", + .has_arg = no_argument, + .flag = NULL, + .val = 'v' + }, { .name = "version", .has_arg = no_argument, @@ -124,301 +409,374 @@ static struct option l_opts[] = { .flag = NULL, .val = 'D' }, + { + .name = "listen", + .has_arg = no_argument, + .flag = NULL, + .val = 'l' + }, + { + .name = "host", + .has_arg = required_argument, + .flag = NULL, + .val = 'h' + }, + { + .name = "port", + .has_arg = required_argument, + .flag = NULL, + .val = 'p' + }, + { + .name = "no-sendfile", + .has_arg = no_argument, + .flag = NULL, + .val = 's' + }, { .name = NULL, } }; -struct thread_information { - int cpu; - pthread_t thread; - - int fd; - void *fd_buf; - unsigned long fd_off; - unsigned long fd_size; - unsigned long fd_max_size; - char fn[MAXPATHLEN + 64]; - - pthread_mutex_t *fd_lock; - FILE *ofile; - char *ofile_buffer; - - unsigned long events_processed; - struct device_information *device; -}; - -struct device_information { - int fd; - char *path; - char buts_name[32]; - volatile int trace_started; - unsigned long drop_count; - struct thread_information *threads; -}; - -static int ncpus; -static struct thread_information *thread_information; -static int ndevs; -static struct device_information *device_information; - -/* command line option globals */ -static char *relay_path; -static char *output_name; -static char *output_dir; -static int act_mask = ~0U; -static int kill_running_trace; -static unsigned long buf_size = BUF_SIZE; -static unsigned long buf_nr = BUF_NR; - -#define is_done() (*(volatile int *)(&done)) -static volatile int done; +static char usage_str[] = "\n\n" \ + "-d | --dev=\n" \ + "[ -r | --relay= ]\n" \ + "[ -o | --output=]\n" \ + "[ -D | --output-dir=\n" \ + "[ -w