* block queue tracing application
*
* Copyright (C) 2005 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#include "blktrace.h"
#include "barrier.h"
-static char blktrace_version[] = "0.99";
+static char blktrace_version[] = "0.99.3";
/*
* You may want to increase this even more, if you are logging at a high
#define OFILE_BUF (128 * 1024)
-#define RELAYFS_TYPE 0xF0B4A981
+#define DEBUGFS_TYPE 0x64626720
-#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
+#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:sI:"
static struct option l_opts[] = {
{
.name = "dev",
.flag = NULL,
.val = 'd'
},
+ {
+ .name = "input-devs",
+ .has_arg = required_argument,
+ .flag = NULL,
+ .val = 'I'
+ },
{
.name = "act-mask",
.has_arg = required_argument,
.val = 'p'
},
{
- .name = "sendfile",
+ .name = "no-sendfile",
.has_arg = no_argument,
.flag = NULL,
.val = 's'
void *buf;
unsigned int len;
unsigned int max_len;
- off_t offset;
};
#define FIFO_SIZE (1024) /* should be plenty big! */
void *fd_buf;
char fn[MAXPATHLEN + 64];
- int pfd;
- size_t *pfd_buf;
-
- struct in_addr cl_in_addr;
-
FILE *ofile;
char *ofile_buffer;
off_t ofile_offset;
unsigned long events_processed;
unsigned long long data_read;
+ unsigned long long data_queued;
struct device_information *device;
int exited;
unsigned long fs_off;
void *fs_buf;
unsigned long fs_buf_len;
+
+ struct net_connection *nc;
};
struct device_information {
volatile int trace_started;
unsigned long drop_count;
struct thread_information *threads;
+ unsigned long buf_size;
+ unsigned long buf_nr;
+ unsigned int page_size;
+
+ struct cl_host *ch;
+ u32 cl_id;
+ time_t cl_connect_time;
};
static int ncpus;
static struct device_information *device_information;
/* command line option globals */
-static char *relay_path;
+static char *debugfs_path;
static char *output_name;
static char *output_dir;
static int act_mask = ~0U;
#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
#define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
-#define __for_each_dip(__d, __i, __e) \
- for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
+#define __for_each_dip(__d, __di, __e, __i) \
+ for (__i = 0, __d = __di; __i < __e; __i++, __d++)
+
+#define for_each_dip(__d, __i) \
+ __for_each_dip(__d, device_information, ndevs, __i)
+#define for_each_nc_dip(__nc, __d, __i) \
+ __for_each_dip(__d, (__nc)->ch->device_information, (__nc)->ch->ndevs, __i)
-#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
+#define __for_each_tip(__d, __t, __ncpus, __j) \
+ for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
#define for_each_tip(__d, __t, __j) \
- for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
+ __for_each_tip(__d, __t, ncpus, __j)
+#define for_each_cl_host(__c) \
+ for (__c = cl_host_list; __c; __c = __c->list_next)
/*
* networking stuff follows. we include a magic number so we know whether
u32 cpu; /* for which cpu */
u32 max_cpus;
u32 len; /* length of following trace data */
+ u32 cl_id; /* id for set of client per-cpu connections */
+ u32 buf_size; /* client buf_size for this trace */
+ u32 buf_nr; /* client buf_nr for this trace */
+ u32 page_size; /* client page_size for this trace */
};
#define TRACE_NET_PORT (8462)
static char hostname[MAXHOSTNAMELEN];
static int net_port = TRACE_NET_PORT;
static int net_mode = 0;
-static int net_use_sendfile;
+static int net_use_sendfile = 1;
-static int net_in_fd = -1;
-static int net_out_fd = -1;
+struct cl_host {
+ struct cl_host *list_next;
+ struct in_addr cl_in_addr;
+ struct net_connection *net_connections;
+ int nconn;
+ struct device_information *device_information;
+ int ndevs;
+ int ncpus;
+ int ndevs_done;
+};
+
+struct net_connection {
+ int in_fd;
+ struct pollfd pfd;
+ time_t connect_time;
+ struct cl_host *ch;
+ int ncpus;
+};
+
+#define NET_MAX_CL_HOSTS (1024)
+static struct cl_host *cl_host_list;
+static int cl_hosts;
+static int net_connects;
+
+static int *net_out_fd;
static void handle_sigint(__attribute__((__unused__)) int sig)
{
char tmp[MAXPATHLEN + 64];
snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
- relay_path, buts_name);
+ debugfs_path, buts_name);
fd = open(tmp, O_RDONLY);
if (fd < 0) {
return atoi(tmp);
}
-static size_t get_subbuf_padding(struct thread_information *tip,
- unsigned subbuf)
-{
- size_t padding_size = buf_nr * sizeof(size_t);
- size_t ret;
-
- if (read(tip->pfd, tip->pfd_buf, padding_size) < 0) {
- perror("tip pad read");
- ret = -1;
- } else
- ret = tip->pfd_buf[subbuf];
-
- return ret;
-}
-
static int start_trace(struct device_information *dip)
{
struct blk_user_trace_setup buts;
memset(&buts, 0, sizeof(buts));
- buts.buf_size = buf_size;
- buts.buf_nr = buf_nr;
+ buts.buf_size = dip->buf_size;
+ buts.buf_nr = dip->buf_nr;
buts.act_mask = act_mask;
if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
}
}
-static void wait_for_data(struct thread_information *tip)
+static void wait_for_data(struct thread_information *tip, int timeout)
{
struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
- do {
- poll(&pfd, 1, 100);
+ while (!is_done()) {
+ if (poll(&pfd, 1, timeout) < 0) {
+ perror("poll");
+ break;
+ }
if (pfd.revents & POLLIN)
break;
if (tip->ofile_stdout)
break;
- } while (!is_done());
+ }
}
static int read_data_file(struct thread_information *tip, void *buf,
int ret = 0;
do {
- wait_for_data(tip);
+ wait_for_data(tip, 100);
ret = read(tip->fd, buf, len);
if (!ret)
static int read_data_net(struct thread_information *tip, void *buf,
unsigned int len)
{
+ struct net_connection *nc = tip->nc;
unsigned int bytes_left = len;
int ret = 0;
do {
- ret = recv(net_in_fd, buf, bytes_left, MSG_WAITALL);
+ ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
if (!ret)
continue;
return len - bytes_left;
}
-static int read_data(struct thread_information *tip, void *buf,
- unsigned int len)
-{
- return tip->read_data(tip, buf, len);
-}
-
static inline struct tip_subbuf *
subbuf_fifo_dequeue(struct thread_information *tip)
{
{
int ofd = fileno(tip->ofile);
int ret;
+ unsigned long nr;
/*
* extend file, if we have to. use chunks of 16 subbuffers.
*/
- if (tip->fs_off + buf_size > tip->fs_buf_len) {
+ if (tip->fs_off + maxlen > tip->fs_buf_len) {
if (tip->fs_buf) {
munlock(tip->fs_buf, tip->fs_buf_len);
munmap(tip->fs_buf, tip->fs_buf_len);
tip->fs_buf = NULL;
}
- tip->fs_off = tip->fs_size & (page_size - 1);
- tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
+ tip->fs_off = tip->fs_size & (tip->device->page_size - 1);
+ nr = max(16, tip->device->buf_nr);
+ tip->fs_buf_len = (nr * tip->device->buf_size) - tip->fs_off;
tip->fs_max_size += tip->fs_buf_len;
if (ftruncate(ofd, tip->fs_max_size) < 0) {
mlock(tip->fs_buf, tip->fs_buf_len);
}
- ret = read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
+ ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
if (ret >= 0) {
tip->data_read += ret;
tip->fs_size += ret;
struct tip_subbuf *ts = malloc(sizeof(*ts));
int ret;
- ts->buf = malloc(buf_size);
+ ts->buf = malloc(tip->device->buf_size);
ts->max_len = maxlen;
- ret = read_data(tip, ts->buf, ts->max_len);
+ ret = tip->read_data(tip, ts->buf, ts->max_len);
if (ret > 0) {
ts->len = ret;
tip->data_read += ret;
if (subbuf_fifo_queue(tip, ts))
- return -1;
+ ret = -1;
}
- return ret;
-}
-
-static int get_subbuf_sendfile(struct thread_information *tip,
- unsigned int maxlen)
-{
- struct tip_subbuf *ts;
- struct stat sb;
- unsigned int ready, this_size, total;
-
- wait_for_data(tip);
-
- /*
- * hack to get last data out, we can't use sendfile for that
- */
- if (is_done())
- return get_subbuf(tip, maxlen);
-
- if (fstat(tip->fd, &sb) < 0) {
- perror("trace stat");
- return -1;
- }
-
- ready = sb.st_size - tip->ofile_offset;
- if (!ready) {
- /*
- * delay a little, since poll() will return data available
- * until sendfile() is run
- */
- usleep(100);
- return 0;
- }
-
- this_size = buf_size;
- total = ready;
- while (ready) {
- if (this_size > ready)
- this_size = ready;
-
- ts = malloc(sizeof(*ts));
-
- ts->buf = NULL;
- ts->max_len = 0;
-
- ts->len = this_size;
- ts->offset = tip->ofile_offset;
- tip->ofile_offset += ts->len;
-
- if (subbuf_fifo_queue(tip, ts))
- return -1;
-
- ready -= this_size;
+ if (ret <= 0) {
+ free(ts->buf);
+ free(ts);
}
- return total;
+ return ret;
}
static void close_thread(struct thread_information *tip)
{
if (tip->fd != -1)
close(tip->fd);
- if (tip->pfd != -1)
- close(tip->pfd);
if (tip->ofile)
fclose(tip->ofile);
if (tip->ofile_buffer)
free(tip->ofile_buffer);
if (tip->fd_buf)
free(tip->fd_buf);
- if (tip->pfd_buf)
- free(tip->pfd_buf);
tip->fd = -1;
- tip->pfd = -1;
tip->ofile = NULL;
tip->ofile_buffer = NULL;
tip->fd_buf = NULL;
/*
* truncate to right size and cleanup mmap
*/
- if (tip->ofile_mmap) {
+ if (tip->ofile_mmap && tip->ofile) {
int ofd = fileno(tip->ofile);
if (tip->fs_buf)
}
snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
- relay_path, tip->device->buts_name, tip->cpu);
+ debugfs_path, tip->device->buts_name, tip->cpu);
tip->fd = open(tip->fn, O_RDONLY);
if (tip->fd < 0) {
perror(tip->fn);
exit_trace(1);
}
- if (net_mode == Net_client && net_use_sendfile) {
- char tmp[MAXPATHLEN + 64];
-
- snprintf(tmp, sizeof(tmp), "%s/block/%s/trace%d.padding",
- relay_path, tip->device->buts_name, tip->cpu);
-
- tip->pfd = open(tmp, O_RDONLY);
- if (tip->pfd < 0) {
- fprintf(stderr, "Couldn't open padding file %s\n", tmp);
- exit_trace(1);
- }
-
- tip->pfd_buf = malloc(buf_nr * sizeof(size_t));
- }
-
while (!is_done()) {
- if (tip->get_subbuf(tip, buf_size) < 0)
+ if (tip->get_subbuf(tip, tip->device->buf_size) < 0)
break;
}
/*
* trace is stopped, pull data until we get a short read
*/
- while (tip->get_subbuf(tip, buf_size) > 0)
+ while (tip->get_subbuf(tip, tip->device->buf_size) > 0)
;
tip_ftrunc_final(tip);
hdr.cpu = tip->cpu;
hdr.max_cpus = ncpus;
hdr.len = len;
-
- return write_data_net(net_out_fd, &hdr, sizeof(hdr));
+ hdr.cl_id = getpid();
+ hdr.buf_size = tip->device->buf_size;
+ hdr.buf_nr = tip->device->buf_nr;
+ hdr.page_size = tip->device->page_size;
+
+ return write_data_net(net_out_fd[tip->cpu], &hdr, sizeof(hdr));
}
/*
*/
static void net_client_send_close(void)
{
+ struct device_information *dip;
struct blktrace_net_hdr hdr;
+ int i;
- hdr.magic = BLK_IO_TRACE_MAGIC;
- hdr.cpu = 0;
- hdr.max_cpus = ncpus;
- hdr.len = 0;
+ for_each_dip(dip, i) {
+ hdr.magic = BLK_IO_TRACE_MAGIC;
+ hdr.max_cpus = ncpus;
+ hdr.len = 0;
+ strcpy(hdr.buts_name, dip->buts_name);
+ hdr.cpu = get_dropped_count(dip->buts_name);
+ hdr.cl_id = getpid();
+ hdr.buf_size = dip->buf_size;
+ hdr.buf_nr = dip->buf_nr;
+ hdr.page_size = dip->page_size;
+
+ write_data_net(net_out_fd[0], &hdr, sizeof(hdr));
+ }
- write_data_net(net_out_fd, &hdr, sizeof(hdr));
}
static int flush_subbuf_net(struct thread_information *tip,
struct tip_subbuf *ts)
{
if (net_send_header(tip, ts->len))
- return 1;
- if (write_data_net(net_out_fd, ts->buf, ts->len))
- return 1;
+ return -1;
+ if (write_data_net(net_out_fd[tip->cpu], ts->buf, ts->len))
+ return -1;
free(ts->buf);
free(ts);
- return 0;
+ return 1;
}
static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
{
- unsigned int bytes_left = ts->len;
- int ret;
+ int ret = sendfile(net_out_fd[tip->cpu], tip->fd, NULL, ts->len);
- while (bytes_left && is_done()) {
- ret = sendfile(net_out_fd, tip->fd, &ts->offset, bytes_left);
- if (ret < 0) {
- perror("sendfile");
- break;
- } else if (!ret) {
- usleep(100);
- continue;
- }
-
- ts->offset += ret;
- bytes_left -= ret;
+ if (ret < 0) {
+ perror("sendfile");
+ return 1;
+ } else if (ret < (int) ts->len) {
+ fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
+ return 1;
}
- return bytes_left;
+ return 0;
}
static int flush_subbuf_sendfile(struct thread_information *tip,
struct tip_subbuf *ts)
{
- size_t padding;
- unsigned subbuf;
-
- /*
- * currently we cannot use sendfile() on the last bytes read, as they
- * may not be a full subbuffer. get_subbuf_sendfile() falls back to
- * the read approach for those, so use send() to ship them out
- */
- if (ts->buf)
- return flush_subbuf_net(tip, ts);
-
- subbuf = (ts->offset / buf_size) % buf_nr;
- padding = get_subbuf_padding(tip, subbuf);
- ts->len -= padding;
+ int ret = -1;
if (net_send_header(tip, ts->len))
- return 1;
+ goto err;
if (net_sendfile(tip, ts))
- return 1;
+ goto err;
tip->data_read += ts->len;
+ ret = 1;
+err:
free(ts);
- return 0;
+ return ret;
+}
+
+static int get_subbuf_sendfile(struct thread_information *tip,
+ __attribute__((__unused__)) unsigned int maxlen)
+{
+ struct tip_subbuf *ts;
+ struct stat sb;
+ unsigned int ready;
+
+ wait_for_data(tip, -1);
+
+ if (fstat(tip->fd, &sb) < 0) {
+ perror("trace stat");
+ return -1;
+ }
+
+ ready = sb.st_size - tip->data_queued;
+ if (!ready) {
+ usleep(1000);
+ return 0;
+ }
+
+ ts = malloc(sizeof(*ts));
+ ts->buf = NULL;
+ ts->max_len = 0;
+ ts->len = ready;
+ tip->data_queued += ready;
+
+ if (flush_subbuf_sendfile(tip, ts) < 0)
+ return -1;
+
+ return ready;
}
static int write_data(struct thread_information *tip, void *buf,
if (!buf_len)
return 0;
- while (1) {
- ret = fwrite(buf, buf_len, 1, tip->ofile);
- if (ret == 1)
- break;
-
- if (ret < 0) {
- perror("write");
- return 1;
- }
+ ret = fwrite(buf, buf_len, 1, tip->ofile);
+ if (ferror(tip->ofile) || ret != 1) {
+ perror("fwrite");
+ clearerr(tip->ofile);
+ return 1;
}
if (tip->ofile_stdout)
}
if (!events)
- usleep(10);
+ usleep(100000);
}
/*
* for files, we just wait around for trace threads to exit
*/
if ((output_name && !strcmp(output_name, "-")) ||
- net_mode == Net_client)
+ ((net_mode == Net_client) && !net_use_sendfile))
get_and_write_events();
else {
struct device_information *dip;
do {
tips_running = 0;
- usleep(1000);
+ usleep(100000);
for_each_dip(dip, i)
for_each_tip(dip, tip, j)
net_client_send_close();
}
-static int fill_ofname(struct thread_information *tip, char *dst,
+static int fill_ofname(struct device_information *dip,
+ struct thread_information *tip, char *dst,
char *buts_name)
{
struct stat sb;
int len = 0;
- time_t t;
if (output_dir)
len = sprintf(dst, "%s/", output_dir);
+ else
+ len = sprintf(dst, "./");
if (net_mode == Net_server) {
- len += sprintf(dst + len, "%s-", inet_ntoa(tip->cl_in_addr));
- time(&t);
- len += strftime(dst + len, 64, "%F-%T/", gmtime(&t));
+ struct net_connection *nc = tip->nc;
+
+ len += sprintf(dst + len, "%s-", inet_ntoa(nc->ch->cl_in_addr));
+ len += strftime(dst + len, 64, "%F-%T/", gmtime(&dip->cl_connect_time));
}
if (stat(dst, &sb) < 0) {
if (net_mode == Net_client) {
if (net_use_sendfile) {
tip->get_subbuf = get_subbuf_sendfile;
- tip->flush_subbuf = flush_subbuf_sendfile;
+ tip->flush_subbuf = NULL;
} else {
tip->get_subbuf = get_subbuf;
tip->flush_subbuf = flush_subbuf_net;
mode = _IOLBF;
vbuf_size = 512;
} else {
- if (fill_ofname(tip, op, dip->buts_name))
+ if (fill_ofname(dip, tip, op, dip->buts_name))
return 1;
tip->ofile = fopen(op, "w+");
tip->ofile_stdout = 0;
tip->device = dip;
tip->events_processed = 0;
tip->fd = -1;
- tip->pfd = -1;
memset(&tip->fifo, 0, sizeof(tip->fifo));
tip->leftover_ts = NULL;
perror(dip->path);
return 1;
}
+ dip->buf_size = buf_size;
+ dip->buf_nr = buf_nr;
+ dip->page_size = page_size;
}
return 0;
fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
return 1;
}
+ memset(thread_information, 0, size * ndevs);
for_each_dip(dip, i) {
if (start_trace(dip)) {
}
if (i != ndevs) {
- __for_each_dip(dip, j, i)
+ __for_each_dip(dip, device_information, i, j)
stop_trace(dip);
return 1;
}
if (i != ndevs) {
- __for_each_dip(dip, j, i)
+ __for_each_dip(dip, device_information, i, j)
stop_threads(dip);
for_each_dip(dip, i)
stop_trace(dip);
return 0;
}
-static void show_stats(void)
+static void show_stats(struct device_information *dips, int ndips, int cpus)
{
struct device_information *dip;
struct thread_information *tip;
stat_shown = 1;
total_drops = 0;
- for_each_dip(dip, i) {
+ __for_each_dip(dip, dips, ndips, i) {
if (!no_stdout)
printf("Device: %s\n", dip->path);
events_processed = 0;
data_read = 0;
- for_each_tip(dip, tip, j) {
+ __for_each_tip(dip, tip, cpus, j) {
if (!no_stdout)
printf(" CPU%3d: %20lu events, %8llu KiB data\n",
tip->cpu, tip->events_processed,
fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
}
-static struct device_information *net_get_dip(char *buts_name,
- struct in_addr *cl_in_addr)
+static struct device_information *net_get_dip(struct net_connection *nc,
+ struct blktrace_net_hdr *bnh)
{
- struct device_information *dip;
+ struct device_information *dip, *cl_dip = NULL;
+ struct cl_host *ch = nc->ch;
int i;
- for (i = 0; i < ndevs; i++) {
- dip = &device_information[i];
+ for (i = 0; i < ch->ndevs; i++) {
+ dip = &ch->device_information[i];
- if (!strcmp(dip->buts_name, buts_name))
+ if (!strcmp(dip->buts_name, bnh->buts_name))
return dip;
+
+ if (dip->cl_id == bnh->cl_id)
+ cl_dip = dip;
}
- device_information = realloc(device_information, (ndevs + 1) * sizeof(*dip));
- dip = &device_information[ndevs];
+ ch->device_information = realloc(ch->device_information, (ch->ndevs + 1) * sizeof(*dip));
+ dip = &ch->device_information[ch->ndevs];
memset(dip, 0, sizeof(*dip));
dip->fd = -1;
- strcpy(dip->buts_name, buts_name);
- dip->path = strdup(buts_name);
- ndevs++;
- dip->threads = malloc(ncpus * sizeof(struct thread_information));
- memset(dip->threads, 0, ncpus * sizeof(struct thread_information));
+ dip->ch = ch;
+ dip->cl_id = bnh->cl_id;
+ dip->buf_size = bnh->buf_size;
+ dip->buf_nr = bnh->buf_nr;
+ dip->page_size = bnh->page_size;
+
+ if (cl_dip)
+ dip->cl_connect_time = cl_dip->cl_connect_time;
+ else
+ dip->cl_connect_time = nc->connect_time;
+ strcpy(dip->buts_name, bnh->buts_name);
+ dip->path = strdup(bnh->buts_name);
+ dip->trace_started = 1;
+ ch->ndevs++;
+ dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
+ memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
/*
* open all files
*/
- for (i = 0; i < ncpus; i++) {
+ for (i = 0; i < nc->ncpus; i++) {
struct thread_information *tip = &dip->threads[i];
tip->cpu = i;
tip->device = dip;
tip->fd = -1;
- tip->pfd = -1;
- tip->cl_in_addr = *cl_in_addr;
-
+ tip->nc = nc;
+
if (tip_open_output(dip, tip))
return NULL;
+
+ tip->nc = NULL;
}
return dip;
}
-static struct thread_information *net_get_tip(struct blktrace_net_hdr *bnh,
- struct in_addr *cl_in_addr)
+static struct thread_information *net_get_tip(struct net_connection *nc,
+ struct blktrace_net_hdr *bnh)
{
struct device_information *dip;
+ struct thread_information *tip;
- ncpus = bnh->max_cpus;
- dip = net_get_dip(bnh->buts_name, cl_in_addr);
- return &dip->threads[bnh->cpu];
+ dip = net_get_dip(nc, bnh);
+ if (!dip->trace_started) {
+ fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
+ return NULL;
+ }
+
+ tip = &dip->threads[bnh->cpu];
+ if (!tip->nc)
+ tip->nc = nc;
+
+ return tip;
}
-static int net_get_header(struct blktrace_net_hdr *bnh)
+static int net_get_header(struct net_connection *nc,
+ struct blktrace_net_hdr *bnh)
{
- int fl = fcntl(net_in_fd, F_GETFL);
+ int fl = fcntl(nc->in_fd, F_GETFL);
int bytes_left, ret;
void *p = bnh;
- fcntl(net_in_fd, F_SETFL, fl | O_NONBLOCK);
+ fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
bytes_left = sizeof(*bnh);
while (bytes_left && !is_done()) {
- ret = recv(net_in_fd, p, bytes_left, MSG_WAITALL);
+ ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
if (ret < 0) {
if (errno != EAGAIN) {
perror("recv header");
return 1;
}
- usleep(100);
+ usleep(1000);
continue;
} else if (!ret) {
- usleep(100);
+ usleep(1000);
continue;
} else {
p += ret;
bytes_left -= ret;
}
}
- fcntl(net_in_fd, F_SETFL, fl & ~O_NONBLOCK);
+ fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
return bytes_left;
}
-static int net_server_loop(struct in_addr *cl_in_addr)
+/*
+ * finalize a net client: truncate files, show stats, cleanup, etc
+ */
+static void device_done(struct net_connection *nc, struct device_information *dip)
+{
+ struct thread_information *tip;
+ int i;
+
+ __for_each_tip(dip, tip, nc->ncpus, i)
+ tip_ftrunc_final(tip);
+
+ show_stats(dip, 1, nc->ncpus);
+
+ /*
+ * cleanup for next run
+ */
+ __for_each_tip(dip, tip, nc->ncpus, i) {
+ if (tip->ofile)
+ fclose(tip->ofile);
+ }
+
+ free(dip->threads);
+ free(dip->path);
+
+ close(nc->in_fd);
+ nc->in_fd = -1;
+
+ stat_shown = 0;
+}
+
+static inline int in_addr_eq(struct in_addr a, struct in_addr b)
+{
+ return a.s_addr == b.s_addr;
+}
+
+static void net_add_client_host(struct cl_host *ch)
+{
+ ch->list_next = cl_host_list;
+ cl_host_list = ch;
+ cl_hosts++;
+}
+
+static void net_remove_client_host(struct cl_host *ch)
+{
+ struct cl_host *p, *c;
+
+ for (p = c = cl_host_list; c; c = c->list_next) {
+ if (c == ch) {
+ if (p == c)
+ cl_host_list = c->list_next;
+ else
+ p->list_next = c->list_next;
+ cl_hosts--;
+ return;
+ }
+ p = c;
+ }
+}
+
+static struct cl_host *net_find_client_host(struct in_addr cl_in_addr)
+{
+ struct cl_host *ch = cl_host_list;
+
+ while (ch) {
+ if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
+ return ch;
+ ch = ch->list_next;
+ }
+
+ return NULL;
+}
+
+static void net_client_host_done(struct cl_host *ch)
+{
+ free(ch->device_information);
+ free(ch->net_connections);
+ net_connects -= ch->nconn;
+ net_remove_client_host(ch);
+ free(ch);
+}
+
+/*
+ * handle incoming events from a net client
+ */
+static int net_client_data(struct net_connection *nc)
{
struct thread_information *tip;
struct blktrace_net_hdr bnh;
- if (net_get_header(&bnh))
+ if (net_get_header(nc, &bnh))
return 1;
if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
if (!data_is_native) {
bnh.magic = be32_to_cpu(bnh.magic);
bnh.cpu = be32_to_cpu(bnh.cpu);
+ bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
bnh.len = be32_to_cpu(bnh.len);
+ bnh.cl_id = be32_to_cpu(bnh.cl_id);
+ bnh.buf_size = be32_to_cpu(bnh.buf_size);
+ bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
+ bnh.page_size = be32_to_cpu(bnh.page_size);
}
if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
return 1;
}
+ if (nc->ncpus == -1)
+ nc->ncpus = bnh.max_cpus;
+
/*
* len == 0 means that the other end signalled end-of-run
*/
if (!bnh.len) {
- fprintf(stderr, "server: end of run\n");
- return 1;
+ /*
+ * overload cpu count with dropped events
+ */
+ struct device_information *dip;
+
+ dip = net_get_dip(nc, &bnh);
+ dip->drop_count = bnh.cpu;
+ dip->trace_started = 0;
+
+ printf("server: end of run for %s\n", dip->buts_name);
+
+ device_done(nc, dip);
+
+ if (++nc->ch->ndevs_done == nc->ch->ndevs)
+ net_client_host_done(nc->ch);
+
+ return 0;
}
- tip = net_get_tip(&bnh, cl_in_addr);
+ tip = net_get_tip(nc, &bnh);
if (!tip)
return 1;
return 0;
}
+static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
+{
+ socklen_t socklen = sizeof(*addr);
+ struct net_connection *nc;
+ struct cl_host *ch;
+ int in_fd;
+
+ in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
+ if (in_fd < 0) {
+ perror("accept");
+ return;
+ }
+
+ ch = net_find_client_host(addr->sin_addr);
+ if (!ch) {
+ if (cl_hosts == NET_MAX_CL_HOSTS) {
+ fprintf(stderr, "server: no more clients allowed\n");
+ return;
+ }
+ ch = malloc(sizeof(struct cl_host));
+ memset(ch, 0, sizeof(*ch));
+ ch->cl_in_addr = addr->sin_addr;
+ net_add_client_host(ch);
+
+ printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
+ }
+
+ ch->net_connections = realloc(ch->net_connections, (ch->nconn + 1) * sizeof(*nc));
+ nc = &ch->net_connections[ch->nconn++];
+ memset(nc, 0, sizeof(*nc));
+
+ time(&nc->connect_time);
+ nc->ch = ch;
+ nc->in_fd = in_fd;
+ nc->ncpus = -1;
+ net_connects++;
+}
+
+/*
+ * event driven loop, handle new incoming connections and data from
+ * existing connections
+ */
+static void net_server_handle_connections(int listen_fd,
+ struct sockaddr_in *addr)
+{
+ struct pollfd *pfds = NULL;
+ struct net_connection **ncs = NULL;
+ int max_connects = 0;
+ int i, nconns, events;
+ struct cl_host *ch;
+ struct net_connection *nc;
+
+ printf("server: waiting for connections...\n");
+
+ while (!is_done()) {
+ if (net_connects >= max_connects) {
+ pfds = realloc(pfds, (net_connects + 1) * sizeof(*pfds));
+ ncs = realloc(ncs, (net_connects + 1) * sizeof(*ncs));
+ max_connects = net_connects + 1;
+ }
+ /*
+ * the zero entry is for incoming connections, remaining
+ * entries for clients
+ */
+ pfds[0].fd = listen_fd;
+ pfds[0].events = POLLIN;
+ nconns = 0;
+ for_each_cl_host(ch) {
+ for (i = 0; i < ch->nconn; i++) {
+ nc = &ch->net_connections[i];
+ pfds[nconns + 1].fd = nc->in_fd;
+ pfds[nconns + 1].events = POLLIN;
+ ncs[nconns++] = nc;
+ }
+ }
+
+ events = poll(pfds, 1 + nconns, -1);
+ if (events < 0) {
+ if (errno == EINTR)
+ continue;
+
+ perror("poll");
+ break;
+ } else if (!events)
+ continue;
+
+ if (pfds[0].revents & POLLIN) {
+ net_add_connection(listen_fd, addr);
+ events--;
+ }
+
+ for (i = 0; events && i < nconns; i++) {
+ if (pfds[i + 1].revents & POLLIN) {
+ net_client_data(ncs[i]);
+ events--;
+ }
+ }
+ }
+}
+
/*
* Start here when we are in server mode - just fetch data from the network
* and dump to files
*/
static int net_server(void)
{
- struct device_information *dip;
- struct thread_information *tip;
struct sockaddr_in addr;
- socklen_t socklen;
- int fd, opt, i, j;
+ int fd, opt;
fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
return 1;
}
-repeat:
- signal(SIGINT, NULL);
- signal(SIGHUP, NULL);
- signal(SIGTERM, NULL);
- signal(SIGALRM, NULL);
-
- printf("blktrace: waiting for incoming connection...\n");
-
- socklen = sizeof(addr);
- net_in_fd = accept(fd, (struct sockaddr *) &addr, &socklen);
- if (net_in_fd < 0) {
- perror("accept");
- return 1;
- }
-
- signal(SIGINT, handle_sigint);
- signal(SIGHUP, handle_sigint);
- signal(SIGTERM, handle_sigint);
- signal(SIGALRM, handle_sigint);
-
- printf("blktrace: connection from %s\n", inet_ntoa(addr.sin_addr));
-
- while (!is_done()) {
- if (net_server_loop(&addr.sin_addr))
- break;
- }
-
- for_each_dip(dip, i)
- for_each_tip(dip, tip, j)
- tip_ftrunc_final(tip);
-
- show_stats();
-
- if (is_done())
- return 0;
-
- /*
- * cleanup for next run
- */
- for_each_dip(dip, i) {
- for_each_tip(dip, tip, j)
- fclose(tip->ofile);
-
- free(dip->threads);
- free(dip->path);
- }
-
- free(device_information);
- device_information = NULL;
- ncpus = ndevs = 0;
-
- close(net_in_fd);
- net_in_fd = -1;
- stat_shown = 0;
- goto repeat;
+ net_server_handle_connections(fd, &addr);
+ return 0;
}
/*
* Setup outgoing network connection where we will transmit data
*/
-static int net_setup_client(void)
+static int net_setup_client_cpu(int i, struct sockaddr_in *addr)
{
- struct sockaddr_in addr;
int fd;
fd = socket(AF_INET, SOCK_STREAM, 0);
return 1;
}
+ if (connect(fd, (struct sockaddr *) addr, sizeof(*addr)) < 0) {
+ perror("client: connect");
+ return 1;
+ }
+
+ net_out_fd[i] = fd;
+ return 0;
+}
+
+static int net_setup_client(void)
+{
+ struct sockaddr_in addr;
+ int i;
+
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_port = htons(net_port);
printf("blktrace: connecting to %s\n", hostname);
- if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
- perror("client: connect");
- return 1;
+ net_out_fd = malloc(ncpus * sizeof(*net_out_fd));
+ for (i = 0; i < ncpus; i++) {
+ if (net_setup_client_cpu(i, &addr))
+ return 1;
}
printf("blktrace: connected!\n");
- net_out_fd = fd;
+
return 0;
}
static char usage_str[] = \
- "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
- "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
+ "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
+ "[ -a action ] [ -A action mask ] [ -I <devs file> ] [ -v ]\n\n" \
"\t-d Use specified device. May also be given last after options\n" \
- "\t-r Path to mounted relayfs, defaults to /relay\n" \
+ "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
"\t-o File(s) to send output to\n" \
"\t-D Directory to prepend to output file names\n" \
"\t-k Kill a running trace\n" \
"\t-l Run in network listen mode (blktrace server)\n" \
"\t-h Run in network client mode, connecting to the given host\n" \
"\t-p Network port to use (default 8462)\n" \
- "\t-s Make the network client use sendfile() to transfer data\n" \
+ "\t-s Make the network client NOT use sendfile() to transfer data\n" \
+ "\t-I Add devices found in <devs file>\n" \
"\t-V Print program version info\n\n";
static void show_usage(char *program)
int main(int argc, char *argv[])
{
- static char default_relay_path[] = "/relay";
+ static char default_debugfs_path[] = "/sys/kernel/debug";
struct statfs st;
int i, c;
int stop_watch = 0;
return 1;
break;
+ case 'I': {
+ char dev_line[256];
+ FILE *ifp = fopen(optarg, "r");
+
+ if (!ifp) {
+ fprintf(stderr,
+ "Invalid file for devices %s\n",
+ optarg);
+ return 1;
+ }
+
+ while (fscanf(ifp, "%s\n", dev_line) == 1)
+ if (resize_devices(strdup(dev_line)) != 0)
+ return 1;
+ break;
+ }
+
+
case 'r':
- relay_path = optarg;
+ debugfs_path = optarg;
break;
case 'o':
net_port = atoi(optarg);
break;
case 's':
- net_use_sendfile = 1;
+ net_use_sendfile = 0;
break;
default:
show_usage(argv[0]);
page_size = getpagesize();
- if (net_mode == Net_server)
+ if (net_mode == Net_server) {
+ if (output_name) {
+ fprintf(stderr, "-o ignored in server mode\n");
+ output_name = NULL;
+ }
+
return net_server();
+ }
while (optind < argc) {
if (resize_devices(argv[optind++]) != 0)
return 1;
}
- if (!relay_path)
- relay_path = default_relay_path;
-
if (act_mask_tmp != 0)
act_mask = act_mask_tmp;
- if (statfs(relay_path, &st) < 0) {
+ if (!debugfs_path)
+ debugfs_path = default_debugfs_path;
+
+ if (statfs(debugfs_path, &st) < 0) {
perror("statfs");
fprintf(stderr,"%s does not appear to be a valid path\n",
- relay_path);
+ debugfs_path);
return 1;
- } else if (st.f_type != (long) RELAYFS_TYPE) {
- fprintf(stderr,"%s does not appear to be a relay filesystem\n",
- relay_path);
+ } else if (st.f_type != (long) DEBUGFS_TYPE) {
+ fprintf(stderr,"%s does not appear to be a debug filesystem\n",
+ debugfs_path);
return 1;
}
signal(SIGHUP, handle_sigint);
signal(SIGTERM, handle_sigint);
signal(SIGALRM, handle_sigint);
+ signal(SIGPIPE, SIG_IGN);
if (net_mode == Net_client && net_setup_client())
return 1;
stop_all_traces();
}
- show_stats();
+ show_stats(device_information, ndevs, ncpus);
return 0;
}