X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=blktrace.c;h=d0d271f9add5106b4481321058787f1ba1aa82a4;hb=d025d6c67760a52e34e6c7352f33ad5dbbb4f6f4;hp=24b48d5b2a214ecc9d86222a6263366afbfbe2b7;hpb=e076d33bfa9a5ef334e8db6e143c624d15a7c30f;p=blktrace.git diff --git a/blktrace.c b/blktrace.c index 24b48d5..d0d271f 100644 --- a/blktrace.c +++ b/blktrace.c @@ -2,6 +2,10 @@ * block queue tracing application * * Copyright (C) 2005 Jens Axboe + * Copyright (C) 2006 Jens Axboe + * + * Rewrite to have a single thread per CPU (managing all devices on that CPU) + * Alan D. Brunelle - January 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,48 +22,314 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include #include -#include -#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include "btt/list.h" #include "blktrace.h" -#include "barrier.h" - -static char blktrace_version[] = "0.99"; /* * You may want to increase this even more, if you are logging at a high * rate and see skipped/missed events */ -#define BUF_SIZE (512 * 1024) -#define BUF_NR (4) +#define BUF_SIZE (512 * 1024) +#define BUF_NR (4) + +#define FILE_VBUF_SIZE (128 * 1024) + +#define DEBUGFS_TYPE (0x64626720) +#define TRACE_NET_PORT (8462) + +enum { + Net_none = 0, + Net_server, + Net_client, +}; + +enum thread_status { + Th_running, + Th_leaving, + Th_error +}; + +/* + * Generic stats collected: nevents can be _roughly_ estimated by data_read + * (discounting pdu...) + * + * These fields are updated w/ pdc_dr_update & pdc_nev_update below. + */ +struct pdc_stats { + unsigned long long data_read; + unsigned long long nevents; +}; + +struct devpath { + struct list_head head; + char *path; /* path to device special file */ + char *buts_name; /* name returned from bt kernel code */ + struct pdc_stats *stats; + int fd, ncpus; + unsigned long long drops; + + /* + * For piped output only: + * + * Each tracer will have a tracer_devpath_head that it will add new + * data onto. It's list is protected above (tracer_devpath_head.mutex) + * and it will signal the processing thread using the dp_cond, + * dp_mutex & dp_entries variables above. + */ + struct tracer_devpath_head *heads; + + /* + * For network server mode only: + */ + struct cl_host *ch; + u32 cl_id; + time_t cl_connect_time; + int setup_done; /* ioctl BLKTRACESETUP done */ + struct io_info *ios; +}; + +/* + * For piped output to stdout we will have each tracer thread (one per dev) + * tack buffers read from the relay queues on a per-device list. + * + * The main thread will then collect trace buffers from each of lists in turn. + * + * We will use a mutex to guard each of the trace_buf list. The tracers + * can then signal the main thread using and + * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will + * signal. When dp_entries is 0, the main thread will wait for that condition + * to be signalled.) + * + * adb: It may be better just to have a large buffer per tracer per dev, + * and then use it as a ring-buffer. This would certainly cut down a lot + * of malloc/free thrashing, at the cost of more memory movements (potentially). + */ +struct trace_buf { + struct list_head head; + struct devpath *dpp; + void *buf; + int cpu, len; +}; + +struct tracer_devpath_head { + pthread_mutex_t mutex; + struct list_head head; + struct trace_buf *prev; +}; + +/* + * Used to handle the mmap() interfaces for output file (containing traces) + */ +struct mmap_info { + void *fs_buf; + unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len; + unsigned long buf_size, buf_nr; + int pagesize; +}; + +/* + * Each thread doing work on a (client) side of blktrace will have one + * of these. The ios array contains input/output information, pfds holds + * poll() data. The volatile's provide flags to/from the main executing + * thread. + */ +struct tracer { + struct list_head head; + struct io_info *ios; + struct pollfd *pfds; + pthread_t thread; + int cpu, nios; + volatile int status, is_done; +}; + +/* + * networking stuff follows. we include a magic number so we know whether + * to endianness convert or not. + * + * The len field is overloaded: + * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu + * 1 - Indicates a "close" - Shut down connection orderly + * + * The cpu field is overloaded on close: it will contain the number of drops. + */ +struct blktrace_net_hdr { + u32 magic; /* same as trace magic */ + char buts_name[32]; /* trace name */ + u32 cpu; /* for which cpu */ + u32 max_cpus; + u32 len; /* length of following trace data */ + u32 cl_id; /* id for set of client per-cpu connections */ + u32 buf_size; /* client buf_size for this trace */ + u32 buf_nr; /* client buf_nr for this trace */ + u32 page_size; /* client page_size for this trace */ +}; + +/* + * Each host encountered has one of these. The head is used to link this + * on to the network server's ch_list. Connections associated with this + * host are linked on conn_list, and any devices traced on that host + * are connected on the devpaths list. + */ +struct cl_host { + struct list_head head; + struct list_head conn_list; + struct list_head devpaths; + struct net_server_s *ns; + char *hostname; + struct in_addr cl_in_addr; + int connects, ndevs, cl_opens; +}; + +/* + * Each connection (client to server socket ('fd')) has one of these. A + * back reference to the host ('ch'), and lists headers (for the host + * list, and the network server conn_list) are also included. + */ +struct cl_conn { + struct list_head ch_head, ns_head; + struct cl_host *ch; + int fd, ncpus; + time_t connect_time; +}; + +/* + * The network server requires some poll structures to be maintained - + * one per conection currently on conn_list. The nchs/ch_list values + * are for each host connected to this server. The addr field is used + * for scratch as new connections are established. + */ +struct net_server_s { + struct list_head conn_list; + struct list_head ch_list; + struct pollfd *pfds; + int listen_fd, connects, nchs; + struct sockaddr_in addr; +}; + +/* + * This structure is (generically) used to providide information + * for a read-to-write set of values. + * + * ifn & ifd represent input information + * + * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally). + */ +struct io_info { + struct devpath *dpp; + FILE *ofp; + char *obuf; + struct cl_conn *nc; /* Server network connection */ + + /* + * mmap controlled output files + */ + struct mmap_info mmap_info; + + /* + * Client network fields + */ + unsigned int ready; + unsigned long long data_queued; + + /* + * Input/output file descriptors & names + */ + int ifd, ofd; + char ifn[MAXPATHLEN + 64]; + char ofn[MAXPATHLEN + 64]; +}; + +static char blktrace_version[] = "2.0.0"; + +/* + * Linkage to blktrace helper routines (trace conversions) + */ +int data_is_native = -1; + +static int ndevs; +static int max_cpus; +static int ncpus; +static cpu_set_t *online_cpus; +static int pagesize; +static int act_mask = ~0U; +static int kill_running_trace; +static int stop_watch; +static int piped_output; + +static char *debugfs_path = "/sys/kernel/debug"; +static char *output_name; +static char *output_dir; + +static unsigned long buf_size = BUF_SIZE; +static unsigned long buf_nr = BUF_NR; + +static FILE *pfp; + +static LIST_HEAD(devpaths); +static LIST_HEAD(tracers); + +static volatile int done; + +/* + * tracer threads add entries, the main thread takes them off and processes + * them. These protect the dp_entries variable. + */ +static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER; +static volatile int dp_entries; + +/* + * These synchronize master / thread interactions. + */ +static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER; +static volatile int nthreads_running; +static volatile int nthreads_leaving; +static volatile int nthreads_error; +static volatile int tracers_run; -#define OFILE_BUF (128 * 1024) +/* + * network cmd line params + */ +static struct sockaddr_in hostname_addr; +static char hostname[MAXHOSTNAMELEN]; +static int net_port = TRACE_NET_PORT; +static int net_use_sendfile = 1; +static int net_mode; +static int *cl_fds; -#define RELAYFS_TYPE 0xF0B4A981 +static int (*handle_pfds)(struct tracer *, int, int); +static int (*handle_list)(struct tracer_devpath_head *, struct list_head *); -#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s" +#define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:" static struct option l_opts[] = { { .name = "dev", @@ -67,6 +337,12 @@ static struct option l_opts[] = { .flag = NULL, .val = 'd' }, + { + .name = "input-devs", + .has_arg = required_argument, + .flag = NULL, + .val = 'I' + }, { .name = "act-mask", .has_arg = required_argument, @@ -103,6 +379,12 @@ static struct option l_opts[] = { .flag = NULL, .val = 'w' }, + { + .name = "version", + .has_arg = no_argument, + .flag = NULL, + .val = 'v' + }, { .name = "version", .has_arg = no_argument, @@ -146,7 +428,7 @@ static struct option l_opts[] = { .val = 'p' }, { - .name = "sendfile", + .name = "no-sendfile", .has_arg = no_argument, .flag = NULL, .val = 's' @@ -156,1429 +438,1672 @@ static struct option l_opts[] = { } }; -struct tip_subbuf { - void *buf; - unsigned int len; - unsigned int max_len; -}; +static char usage_str[] = "\n\n" \ + "-d | --dev=\n" \ + "[ -r | --relay= ]\n" \ + "[ -o | --output=]\n" \ + "[ -D | --output-dir=\n" \ + "[ -w